Skip to content

第43章 DevOps实践

学习目标

完成本章学习后,你将能够:

  1. 掌握Docker容器化:Dockerfile编写、镜像构建、容器管理
  2. 使用Docker Compose:多容器编排、网络配置、数据持久化
  3. 实现CI/CD流水线:GitHub Actions、GitLab CI、Jenkins
  4. 使用Kubernetes编排:Deployment、Service、ConfigMap、Ingress
  5. 实现基础设施即代码:Terraform、Ansible、配置管理
  6. 进行监控与日志:Prometheus、Grafana、ELK Stack
  7. 实现自动化部署:蓝绿部署、金丝雀发布、滚动更新
  8. 构建云原生应用:微服务架构、服务网格、云服务集成

43.1 Docker容器化

43.1.1 Dockerfile编写

python
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from enum import Enum
import os


class BaseImage(Enum):
    PYTHON_311_SLIM = "python:3.11-slim"
    PYTHON_311_ALPINE = "python:3.11-alpine"
    PYTHON_311 = "python:3.11"
    UBUNTU_22_04 = "ubuntu:22.04"


@dataclass
class DockerInstruction:
    instruction: str
    arguments: str

    def to_string(self) -> str:
        return f"{self.instruction} {self.arguments}"


@dataclass
class DockerfileBuilder:
    base_image: str = "python:3.11-slim"
    workdir: str = "/app"
    maintainer: Optional[str] = None
    labels: Dict[str, str] = field(default_factory=dict)
    env_vars: Dict[str, str] = field(default_factory=dict)
    expose_ports: List[int] = field(default_factory=list)
    copy_files: List[tuple] = field(default_factory=list)
    run_commands: List[str] = field(default_factory=list)
    healthcheck: Optional[Dict] = None
    entrypoint: Optional[str] = None
    cmd: Optional[str] = None
    user: Optional[str] = None
    volumes: List[str] = field(default_factory=list)

    def add_label(self, key: str, value: str) -> "DockerfileBuilder":
        self.labels[key] = value
        return self

    def add_env(self, key: str, value: str) -> "DockerfileBuilder":
        self.env_vars[key] = value
        return self

    def expose(self, port: int) -> "DockerfileBuilder":
        self.expose_ports.append(port)
        return self

    def copy(self, src: str, dest: str) -> "DockerfileBuilder":
        self.copy_files.append((src, dest))
        return self

    def run(self, command: str) -> "DockerfileBuilder":
        self.run_commands.append(command)
        return self

    def add_volume(self, path: str) -> "DockerfileBuilder":
        self.volumes.append(path)
        return self

    def set_healthcheck(
        self,
        cmd: str,
        interval: str = "30s",
        timeout: str = "5s",
        retries: int = 3
    ) -> "DockerfileBuilder":
        self.healthcheck = {
            "cmd": cmd,
            "interval": interval,
            "timeout": timeout,
            "retries": retries
        }
        return self

    def set_entrypoint(self, entrypoint: str) -> "DockerfileBuilder":
        self.entrypoint = entrypoint
        return self

    def set_cmd(self, cmd: str) -> "DockerfileBuilder":
        self.cmd = cmd
        return self

    def set_user(self, user: str) -> "DockerfileBuilder":
        self.user = user
        return self

    def build(self) -> str:
        lines = []

        lines.append(f"FROM {self.base_image}")

        if self.maintainer:
            lines.append(f"MAINTAINER {self.maintainer}")

        if self.labels:
            label_strs = [f'"{k}"="{v}"' for k, v in self.labels.items()]
            lines.append(f"LABEL {' '.join(label_strs)}")

        lines.append(f"WORKDIR {self.workdir}")

        for key, value in self.env_vars.items():
            lines.append(f"ENV {key}={value}")

        for src, dest in self.copy_files:
            lines.append(f"COPY {src} {dest}")

        for cmd in self.run_commands:
            lines.append(f"RUN {cmd}")

        for port in self.expose_ports:
            lines.append(f"EXPOSE {port}")

        for volume in self.volumes:
            lines.append(f"VOLUME {volume}")

        if self.healthcheck:
            hc = self.healthcheck
            lines.append(
                f"HEALTHCHECK --interval={hc['interval']} --timeout={hc['timeout']} --retries={hc['retries']} "
                f"CMD {hc['cmd']}"
            )

        if self.user:
            lines.append(f"USER {self.user}")

        if self.entrypoint:
            lines.append(f'ENTRYPOINT [{self.entrypoint}]')

        if self.cmd:
            lines.append(f"CMD {self.cmd}")

        return "\n".join(lines)


class DockerfileTemplates:
    @staticmethod
    def python_web_app(
        app_name: str = "app",
        port: int = 8000,
        requirements_file: str = "requirements.txt"
    ) -> str:
        builder = DockerfileBuilder(
            base_image="python:3.11-slim",
            workdir=f"/{app_name}"
        )

        builder.add_label("maintainer", "devops@example.com")
        builder.add_label("app", app_name)

        builder.add_env("PYTHONDONTWRITEBYTECODE", "1")
        builder.add_env("PYTHONUNBUFFERED", "1")

        builder.run("apt-get update && apt-get install -y --no-install-recommends gcc && rm -rf /var/lib/apt/lists/*")

        builder.copy(requirements_file, ".")
        builder.run("pip install --no-cache-dir -r requirements.txt")

        builder.copy(".", ".")

        builder.expose(port)

        builder.set_healthcheck(
            cmd=f"curl -f http://localhost:{port}/health || exit 1",
            interval="30s",
            timeout="5s",
            retries=3
        )

        builder.set_cmd(f'["python", "main.py"]')

        return builder.build()

    @staticmethod
    def python_api_service(
        app_name: str = "api",
        port: int = 8000
    ) -> str:
        builder = DockerfileBuilder(
            base_image="python:3.11-slim",
            workdir=f"/{app_name}"
        )

        builder.add_env("APP_ENV", "production")

        builder.run("groupadd -r appuser && useradd -r -g appuser appuser")

        builder.copy("requirements.txt", ".")
        builder.run("pip install --no-cache-dir -r requirements.txt")

        builder.copy("app", "./app")
        builder.copy("main.py", ".")

        builder.run("chown -R appuser:appuser /app")

        builder.set_user("appuser")
        builder.expose(port)

        builder.set_cmd(f'["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "{port}"]')

        return builder.build()

    @staticmethod
    def multi_stage_build(
        app_name: str = "app",
        port: int = 8000
    ) -> str:
        lines = [
            f"FROM python:3.11 AS builder",
            f"WORKDIR /build",
            f"COPY requirements.txt .",
            f"RUN pip install --no-cache-dir --target=/build/deps -r requirements.txt",
            "",
            f"FROM python:3.11-slim",
            f"WORKDIR /{app_name}",
            f"COPY --from=builder /build/deps /usr/local/lib/python3.11/site-packages",
            f"COPY . .",
            f"EXPOSE {port}",
            f'CMD ["python", "main.py"]'
        ]
        return "\n".join(lines)


class DockerImageManager:
    def __init__(self, registry: str = "docker.io"):
        self.registry = registry

    def build_image(
        self,
        dockerfile_path: str,
        image_name: str,
        tag: str = "latest",
        build_args: Dict[str, str] = None
    ) -> str:
        cmd = f"docker build -t {self.registry}/{image_name}:{tag}"

        if build_args:
            for key, value in build_args.items():
                cmd += f" --build-arg {key}={value}"

        cmd += f" -f {dockerfile_path} ."
        return cmd

    def tag_image(self, source: str, target: str) -> str:
        return f"docker tag {source} {target}"

    def push_image(self, image_name: str, tag: str = "latest") -> str:
        return f"docker push {self.registry}/{image_name}:{tag}"

    def pull_image(self, image_name: str, tag: str = "latest") -> str:
        return f"docker pull {self.registry}/{image_name}:{tag}"

    def remove_image(self, image_name: str, tag: str = "latest") -> str:
        return f"docker rmi {self.registry}/{image_name}:{tag}"

    def list_images(self) -> str:
        return "docker images"

    def inspect_image(self, image_name: str) -> str:
        return f"docker inspect {image_name}"

43.1.2 Docker Compose

python
from typing import Dict, List, Optional, Any
from dataclasses import dataclass, field
import yaml


@dataclass
class DockerService:
    image: Optional[str] = None
    build: Optional[str] = None
    container_name: Optional[str] = None
    ports: List[str] = field(default_factory=list)
    environment: Dict[str, str] = field(default_factory=dict)
    env_file: List[str] = field(default_factory=list)
    volumes: List[str] = field(default_factory=list)
    networks: List[str] = field(default_factory=list)
    depends_on: List[str] = field(default_factory=list)
    restart: str = "unless-stopped"
    command: Optional[str] = None
    entrypoint: Optional[str] = None
    healthcheck: Optional[Dict] = None
    labels: Dict[str, str] = field(default_factory=dict)

    def to_dict(self) -> Dict:
        config = {}

        if self.image:
            config["image"] = self.image
        if self.build:
            config["build"] = self.build
        if self.container_name:
            config["container_name"] = self.container_name
        if self.ports:
            config["ports"] = self.ports
        if self.environment:
            config["environment"] = self.environment
        if self.env_file:
            config["env_file"] = self.env_file
        if self.volumes:
            config["volumes"] = self.volumes
        if self.networks:
            config["networks"] = self.networks
        if self.depends_on:
            config["depends_on"] = self.depends_on
        if self.restart:
            config["restart"] = self.restart
        if self.command:
            config["command"] = self.command
        if self.entrypoint:
            config["entrypoint"] = self.entrypoint
        if self.healthcheck:
            config["healthcheck"] = self.healthcheck
        if self.labels:
            config["labels"] = self.labels

        return config


@dataclass
class DockerNetwork:
    driver: str = "bridge"
    ipam: Optional[Dict] = None

    def to_dict(self) -> Dict:
        config = {"driver": self.driver}
        if self.ipam:
            config["ipam"] = self.ipam
        return config


@dataclass
class DockerVolume:
    driver: str = "local"
    driver_opts: Optional[Dict] = None

    def to_dict(self) -> Dict:
        config = {"driver": self.driver}
        if self.driver_opts:
            config["driver_opts"] = self.driver_opts
        return config


class DockerComposeBuilder:
    def __init__(self, version: str = "3.8"):
        self.version = version
        self.services: Dict[str, DockerService] = {}
        self.networks: Dict[str, DockerNetwork] = {}
        self.volumes: Dict[str, DockerVolume] = {}
        self.configs: Dict[str, Any] = {}
        self.secrets: Dict[str, Any] = {}

    def add_service(self, name: str, service: DockerService) -> "DockerComposeBuilder":
        self.services[name] = service
        return self

    def add_network(self, name: str, network: DockerNetwork = None) -> "DockerComposeBuilder":
        self.networks[name] = network or DockerNetwork()
        return self

    def add_volume(self, name: str, volume: DockerVolume = None) -> "DockerComposeBuilder":
        self.volumes[name] = volume or DockerVolume()
        return self

    def build(self) -> Dict:
        compose = {"version": self.version}

        if self.services:
            compose["services"] = {
                name: service.to_dict()
                for name, service in self.services.items()
            }

        if self.networks:
            compose["networks"] = {
                name: network.to_dict()
                for name, network in self.networks.items()
            }

        if self.volumes:
            compose["volumes"] = {
                name: volume.to_dict()
                for name, volume in self.volumes.items()
            }

        if self.configs:
            compose["configs"] = self.configs

        if self.secrets:
            compose["secrets"] = self.secrets

        return compose

    def to_yaml(self) -> str:
        return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)


class DockerComposeTemplates:
    @staticmethod
    def web_app_stack(
        app_name: str = "webapp",
        db_password: str = "secret"
    ) -> str:
        builder = DockerComposeBuilder()

        web_service = DockerService(
            build=".",
            container_name=f"{app_name}_web",
            ports=["8000:8000"],
            environment={
                "DATABASE_URL": f"postgresql://postgres:{db_password}@db:5432/{app_name}",
                "REDIS_URL": "redis://redis:6379/0"
            },
            volumes=[f"{app_name}_data:/app/data"],
            networks=["frontend", "backend"],
            depends_on=["db", "redis"],
            healthcheck={
                "test": ["CMD", "curl", "-f", "http://localhost:8000/health"],
                "interval": "30s",
                "timeout": "10s",
                "retries": 3
            }
        )
        builder.add_service("web", web_service)

        db_service = DockerService(
            image="postgres:15-alpine",
            container_name=f"{app_name}_db",
            environment={
                "POSTGRES_DB": app_name,
                "POSTGRES_PASSWORD": db_password
            },
            volumes=[f"{app_name}_db:/var/lib/postgresql/data"],
            networks=["backend"]
        )
        builder.add_service("db", db_service)

        redis_service = DockerService(
            image="redis:7-alpine",
            container_name=f"{app_name}_redis",
            volumes=[f"{app_name}_redis:/data"],
            networks=["backend"]
        )
        builder.add_service("redis", redis_service)

        builder.add_network("frontend")
        builder.add_network("backend")

        builder.add_volume(f"{app_name}_data")
        builder.add_volume(f"{app_name}_db")
        builder.add_volume(f"{app_name}_redis")

        return builder.to_yaml()

    @staticmethod
    def microservices_stack() -> str:
        builder = DockerComposeBuilder()

        api_gateway = DockerService(
            image="nginx:alpine",
            container_name="api_gateway",
            ports=["80:80", "443:443"],
            volumes=["./nginx.conf:/etc/nginx/nginx.conf:ro"],
            networks=["frontend"],
            depends_on=["user_service", "product_service", "order_service"]
        )
        builder.add_service("gateway", api_gateway)

        user_service = DockerService(
            build="./services/user",
            container_name="user_service",
            environment={"SERVICE_PORT": "8001"},
            networks=["frontend", "backend"],
            depends_on=["user_db"]
        )
        builder.add_service("user_service", user_service)

        product_service = DockerService(
            build="./services/product",
            container_name="product_service",
            environment={"SERVICE_PORT": "8002"},
            networks=["frontend", "backend"],
            depends_on=["product_db"]
        )
        builder.add_service("product_service", product_service)

        order_service = DockerService(
            build="./services/order",
            container_name="order_service",
            environment={"SERVICE_PORT": "8003"},
            networks=["frontend", "backend"],
            depends_on=["order_db", "rabbitmq"]
        )
        builder.add_service("order_service", order_service)

        user_db = DockerService(
            image="postgres:15-alpine",
            container_name="user_db",
            environment={"POSTGRES_DB": "users"},
            volumes=["user_db_data:/var/lib/postgresql/data"],
            networks=["backend"]
        )
        builder.add_service("user_db", user_db)

        product_db = DockerService(
            image="mongodb:6",
            container_name="product_db",
            volumes=["product_db_data:/data/db"],
            networks=["backend"]
        )
        builder.add_service("product_db", product_db)

        order_db = DockerService(
            image="postgres:15-alpine",
            container_name="order_db",
            environment={"POSTGRES_DB": "orders"},
            volumes=["order_db_data:/var/lib/postgresql/data"],
            networks=["backend"]
        )
        builder.add_service("order_db", order_db)

        rabbitmq = DockerService(
            image="rabbitmq:3-management-alpine",
            container_name="rabbitmq",
            ports=["15672:15672"],
            networks=["backend"]
        )
        builder.add_service("rabbitmq", rabbitmq)

        builder.add_network("frontend")
        builder.add_network("backend")

        return builder.to_yaml()

43.2 CI/CD流水线

43.2.1 GitHub Actions

python
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
import yaml


@dataclass
class GitHubActionStep:
    name: str
    run: Optional[str] = None
    uses: Optional[str] = None
    with_params: Dict[str, Any] = field(default_factory=dict)
    env: Dict[str, str] = field(default_factory=dict)
    if_condition: Optional[str] = None

    def to_dict(self) -> Dict:
        step = {"name": self.name}

        if self.run:
            step["run"] = self.run
        if self.uses:
            step["uses"] = self.uses
        if self.with_params:
            step["with"] = self.with_params
        if self.env:
            step["env"] = self.env
        if self.if_condition:
            step["if"] = self.if_condition

        return step


@dataclass
class GitHubActionJob:
    runs_on: str = "ubuntu-latest"
    steps: List[GitHubActionStep] = field(default_factory=list)
    needs: List[str] = field(default_factory=list)
    env: Dict[str, str] = field(default_factory=dict)
    strategy: Optional[Dict] = None
    services: Dict[str, Any] = field(default_factory=dict)

    def add_step(self, step: GitHubActionStep) -> "GitHubActionJob":
        self.steps.append(step)
        return self

    def to_dict(self) -> Dict:
        job = {"runs-on": self.runs_on}

        if self.needs:
            job["needs"] = self.needs
        if self.env:
            job["env"] = self.env
        if self.strategy:
            job["strategy"] = self.strategy
        if self.services:
            job["services"] = self.services
        if self.steps:
            job["steps"] = [step.to_dict() for step in self.steps]

        return job


class GitHubActionsBuilder:
    def __init__(self, name: str):
        self.name = name
        self.on_events: Dict[str, Any] = {}
        self.jobs: Dict[str, GitHubActionJob] = {}
        self.env: Dict[str, str] = {}

    def on_push(self, branches: List[str] = None) -> "GitHubActionsBuilder":
        if branches:
            self.on_events["push"] = {"branches": branches}
        else:
            self.on_events["push"] = None
        return self

    def on_pull_request(self, branches: List[str] = None) -> "GitHubActionsBuilder":
        if branches:
            self.on_events["pull_request"] = {"branches": branches}
        else:
            self.on_events["pull_request"] = None
        return self

    def on_schedule(self, cron: str) -> "GitHubActionsBuilder":
        self.on_events["schedule"] = [{"cron": cron}]
        return self

    def add_job(self, name: str, job: GitHubActionJob) -> "GitHubActionsBuilder":
        self.jobs[name] = job
        return self

    def add_env(self, key: str, value: str) -> "GitHubActionsBuilder":
        self.env[key] = value
        return self

    def build(self) -> Dict:
        workflow = {"name": self.name}

        if self.on_events:
            workflow["on"] = self.on_events

        if self.env:
            workflow["env"] = self.env

        if self.jobs:
            workflow["jobs"] = {
                name: job.to_dict()
                for name, job in self.jobs.items()
            }

        return workflow

    def to_yaml(self) -> str:
        return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)


class GitHubActionsTemplates:
    @staticmethod
    def python_ci() -> str:
        builder = GitHubActionsBuilder("Python CI")

        builder.on_push(["main", "develop"])
        builder.on_pull_request(["main"])

        test_job = GitHubActionJob(runs_on="ubuntu-latest")

        test_job.add_step(GitHubActionStep(
            name="Checkout code",
            uses="actions/checkout@v4"
        ))

        test_job.add_step(GitHubActionStep(
            name="Set up Python",
            uses="actions/setup-python@v5",
            with_params={
                "python-version": "3.11",
                "cache": "pip"
            }
        ))

        test_job.add_step(GitHubActionStep(
            name="Install dependencies",
            run="pip install -r requirements.txt\npip install pytest pytest-cov"
        ))

        test_job.add_step(GitHubActionStep(
            name="Run tests",
            run="pytest --cov=app --cov-report=xml"
        ))

        test_job.add_step(GitHubActionStep(
            name="Upload coverage",
            uses="codecov/codecov-action@v3",
            with_params={"file": "./coverage.xml"}
        ))

        builder.add_job("test", test_job)

        return builder.to_yaml()

    @staticmethod
    def docker_build_push() -> str:
        builder = GitHubActionsBuilder("Docker Build and Push")

        builder.on_push(["main"])

        build_job = GitHubActionJob(runs_on="ubuntu-latest")

        build_job.add_step(GitHubActionStep(
            name="Checkout code",
            uses="actions/checkout@v4"
        ))

        build_job.add_step(GitHubActionStep(
            name="Set up Docker Buildx",
            uses="docker/setup-buildx-action@v3"
        ))

        build_job.add_step(GitHubActionStep(
            name="Login to Docker Hub",
            uses="docker/login-action@v3",
            with_params={
                "username": "${{ secrets.DOCKER_USERNAME }}",
                "password": "${{ secrets.DOCKER_PASSWORD }}"
            }
        ))

        build_job.add_step(GitHubActionStep(
            name="Build and push",
            uses="docker/build-push-action@v5",
            with_params={
                "context": ".",
                "push": True,
                "tags": "${{ secrets.DOCKER_USERNAME }}/app:${{ github.sha }},${{ secrets.DOCKER_USERNAME }}/app:latest",
                "cache-from": "type=registry,ref=${{ secrets.DOCKER_USERNAME }}/app:buildcache",
                "cache-to": "type=registry,ref=${{ secrets.DOCKER_USERNAME }}/app:buildcache,mode=max"
            }
        ))

        builder.add_job("build", build_job)

        return builder.to_yaml()

    @staticmethod
    def deploy_to_kubernetes() -> str:
        builder = GitHubActionsBuilder("Deploy to Kubernetes")

        builder.on_push(["main"])

        deploy_job = GitHubActionJob(
            runs_on="ubuntu-latest",
            needs=["build"]
        )

        deploy_job.add_step(GitHubActionStep(
            name="Checkout code",
            uses="actions/checkout@v4"
        ))

        deploy_job.add_step(GitHubActionStep(
            name="Set up kubectl",
            uses="azure/setup-kubectl@v3"
        ))

        deploy_job.add_step(GitHubActionStep(
            name="Configure kubectl",
            run="mkdir -p ~/.kube\necho \"${{ secrets.KUBE_CONFIG }}\" | base64 -d > ~/.kube/config"
        ))

        deploy_job.add_step(GitHubActionStep(
            name="Deploy to Kubernetes",
            run="kubectl apply -f k8s/"
        ))

        deploy_job.add_step(GitHubActionStep(
            name="Wait for rollout",
            run="kubectl rollout status deployment/app -n default --timeout=300s"
        ))

        builder.add_job("deploy", deploy_job)

        return builder.to_yaml()

43.2.2 GitLab CI

python
class GitLabCIBuilder:
    def __init__(self):
        self.stages: List[str] = []
        self.variables: Dict[str, str] = {}
        self.jobs: Dict[str, Dict] = {}
        self.default_config: Dict = {}

    def add_stage(self, stage: str) -> "GitLabCIBuilder":
        if stage not in self.stages:
            self.stages.append(stage)
        return self

    def add_variable(self, key: str, value: str) -> "GitLabCIBuilder":
        self.variables[key] = value
        return self

    def add_job(
        self,
        name: str,
        stage: str,
        image: str = None,
        script: List[str] = None,
        only: List[str] = None,
        except_branches: List[str] = None,
        variables: Dict[str, str] = None,
        artifacts: Dict = None,
        services: List[str] = None,
        before_script: List[str] = None,
        after_script: List[str] = None
    ) -> "GitLabCIBuilder":
        self.add_stage(stage)

        job = {"stage": stage}

        if image:
            job["image"] = image
        if script:
            job["script"] = script
        if only:
            job["only"] = only
        if except_branches:
            job["except"] = except_branches
        if variables:
            job["variables"] = variables
        if artifacts:
            job["artifacts"] = artifacts
        if services:
            job["services"] = services
        if before_script:
            job["before_script"] = before_script
        if after_script:
            job["after_script"] = after_script

        self.jobs[name] = job
        return self

    def set_default(self, image: str = None, before_script: List[str] = None) -> "GitLabCIBuilder":
        if image:
            self.default_config["image"] = image
        if before_script:
            self.default_config["before_script"] = before_script
        return self

    def build(self) -> Dict:
        config = {}

        if self.stages:
            config["stages"] = self.stages

        if self.variables:
            config["variables"] = self.variables

        if self.default_config:
            config["default"] = self.default_config

        if self.jobs:
            config.update(self.jobs)

        return config

    def to_yaml(self) -> str:
        return yaml.dump(self.build(), default_flow_style=False, sort_keys=False)


class GitLabCITemplates:
    @staticmethod
    def python_pipeline() -> str:
        builder = GitLabCIBuilder()

        builder.add_variable("PIP_CACHE_DIR", "$CI_PROJECT_DIR/.cache/pip")

        builder.set_default(
            image="python:3.11",
            before_script=["pip install -r requirements.txt"]
        )

        builder.add_job(
            name="test",
            stage="test",
            script=[
                "pip install pytest pytest-cov",
                "pytest --cov=app --cov-report=xml --cov-report=html"
            ],
            artifacts={
                "paths": ["htmlcov/", "coverage.xml"],
                "reports": {"coverage_report": {"coverage_format": "cobertura", "path": "coverage.xml"}}
            }
        )

        builder.add_job(
            name="lint",
            stage="test",
            script=[
                "pip install flake8 black",
                "flake8 app/",
                "black --check app/"
            ]
        )

        builder.add_job(
            name="build",
            stage="build",
            script=[
                "docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .",
                "docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
            ],
            only=["main"],
            variables={"DOCKER_DRIVER": "overlay2"}
        )

        builder.add_job(
            name="deploy",
            stage="deploy",
            script=[
                "kubectl set image deployment/app app=$CI_REGISTRY_IMAGE:$CI_COMMIT_SHA"
            ],
            only=["main"]
        )

        return builder.to_yaml()

43.3 Kubernetes编排

43.3.1 Deployment与Service

python
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
import yaml


@dataclass
class Container:
    name: str
    image: str
    ports: List[int] = field(default_factory=list)
    env: Dict[str, str] = field(default_factory=dict)
    env_from: List[Dict] = field(default_factory=list)
    resources: Dict = field(default_factory=dict)
    volume_mounts: List[Dict] = field(default_factory=list)
    liveness_probe: Optional[Dict] = None
    readiness_probe: Optional[Dict] = None
    command: List[str] = field(default_factory=list)
    args: List[str] = field(default_factory=list)

    def to_dict(self) -> Dict:
        container = {
            "name": self.name,
            "image": self.image
        }

        if self.ports:
            container["ports"] = [{"containerPort": p} for p in self.ports]

        if self.env:
            container["env"] = [
                {"name": k, "value": v}
                for k, v in self.env.items()
            ]

        if self.env_from:
            container["envFrom"] = self.env_from

        if self.resources:
            container["resources"] = self.resources

        if self.volume_mounts:
            container["volumeMounts"] = self.volume_mounts

        if self.liveness_probe:
            container["livenessProbe"] = self.liveness_probe

        if self.readiness_probe:
            container["readinessProbe"] = self.readiness_probe

        if self.command:
            container["command"] = self.command

        if self.args:
            container["args"] = self.args

        return container


@dataclass
class Deployment:
    name: str
    replicas: int = 1
    labels: Dict[str, str] = field(default_factory=dict)
    containers: List[Container] = field(default_factory=list)
    volumes: List[Dict] = field(default_factory=list)
    image_pull_secrets: List[str] = field(default_factory=list)
    node_selector: Dict[str, str] = field(default_factory=dict)
    tolerations: List[Dict] = field(default_factory=list)
    affinity: Optional[Dict] = None

    def add_container(self, container: Container) -> "Deployment":
        self.containers.append(container)
        return self

    def add_volume(self, name: str, config: Dict) -> "Deployment":
        self.volumes.append({"name": name, **config})
        return self

    def to_dict(self) -> Dict:
        spec = {
            "replicas": self.replicas,
            "selector": {
                "matchLabels": self.labels
            },
            "template": {
                "metadata": {"labels": self.labels},
                "spec": {
                    "containers": [c.to_dict() for c in self.containers]
                }
            }
        }

        if self.volumes:
            spec["template"]["spec"]["volumes"] = self.volumes

        if self.image_pull_secrets:
            spec["template"]["spec"]["imagePullSecrets"] = [
                {"name": s} for s in self.image_pull_secrets
            ]

        if self.node_selector:
            spec["template"]["spec"]["nodeSelector"] = self.node_selector

        if self.tolerations:
            spec["template"]["spec"]["tolerations"] = self.tolerations

        if self.affinity:
            spec["template"]["spec"]["affinity"] = self.affinity

        return {
            "apiVersion": "apps/v1",
            "kind": "Deployment",
            "metadata": {
                "name": self.name,
                "labels": self.labels
            },
            "spec": spec
        }


@dataclass
class Service:
    name: str
    selector: Dict[str, str]
    ports: List[Dict]
    service_type: str = "ClusterIP"
    labels: Dict[str, str] = field(default_factory=dict)

    def to_dict(self) -> Dict:
        return {
            "apiVersion": "v1",
            "kind": "Service",
            "metadata": {
                "name": self.name,
                "labels": self.labels
            },
            "spec": {
                "type": self.service_type,
                "selector": self.selector,
                "ports": self.ports
            }
        }


class KubernetesManifestBuilder:
    def __init__(self):
        self.manifests: List[Dict] = []

    def add_deployment(self, deployment: Deployment) -> "KubernetesManifestBuilder":
        self.manifests.append(deployment.to_dict())
        return self

    def add_service(self, service: Service) -> "KubernetesManifestBuilder":
        self.manifests.append(service.to_dict())
        return self

    def add_configmap(self, name: str, data: Dict, labels: Dict = None) -> "KubernetesManifestBuilder":
        configmap = {
            "apiVersion": "v1",
            "kind": "ConfigMap",
            "metadata": {"name": name},
            "data": data
        }
        if labels:
            configmap["metadata"]["labels"] = labels
        self.manifests.append(configmap)
        return self

    def add_secret(self, name: str, data: Dict, secret_type: str = "Opaque") -> "KubernetesManifestBuilder":
        import base64
        encoded_data = {
            k: base64.b64encode(v.encode()).decode()
            for k, v in data.items()
        }
        self.manifests.append({
            "apiVersion": "v1",
            "kind": "Secret",
            "metadata": {"name": name},
            "type": secret_type,
            "data": encoded_data
        })
        return self

    def add_ingress(
        self,
        name: str,
        rules: List[Dict],
        annotations: Dict = None,
        tls: List[Dict] = None
    ) -> "KubernetesManifestBuilder":
        ingress = {
            "apiVersion": "networking.k8s.io/v1",
            "kind": "Ingress",
            "metadata": {"name": name},
            "spec": {"rules": rules}
        }
        if annotations:
            ingress["metadata"]["annotations"] = annotations
        if tls:
            ingress["spec"]["tls"] = tls
        self.manifests.append(ingress)
        return self

    def to_yaml(self) -> str:
        docs = [yaml.dump(m, default_flow_style=False, sort_keys=False) for m in self.manifests]
        return "---\n".join(docs)

43.4 知识图谱

43.4.1 DevOps工具链架构

┌─────────────────────────────────────────────────────────────────────┐
│                      DevOps工具链全景图                              │
├─────────────────────────────────────────────────────────────────────┤
│  ┌─────────────────────────────────────────────────────────────┐   │
│  │                      代码管理 (Code)                          │   │
│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │   │
│  │  │   Git    │ │ GitHub   │ │ GitLab   │ │Bitbucket │       │   │
│  │  │版本控制  │ │ 代码托管  │ │ CI/CD    │ │ 企业托管 │       │   │
│  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘       │   │
│  └─────────────────────────────────────────────────────────────┘   │
│                                │                                    │
│                                ▼                                    │
│  ┌─────────────────────────────────────────────────────────────┐   │
│  │                      构建与测试 (Build & Test)                │   │
│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │   │
│  │  │  Docker  │ │ Jenkins  │ │ GitHub   │ │ GitLab   │       │   │
│  │  │ 容器构建  │ │ 流水线   │ │ Actions  │ │ CI       │       │   │
│  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘       │   │
│  └─────────────────────────────────────────────────────────────┘   │
│                                │                                    │
│                                ▼                                    │
│  ┌─────────────────────────────────────────────────────────────┐   │
│  │                      部署与发布 (Deploy)                       │   │
│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │   │
│  │  │Kubernetes│ │  Helm    │ │ ArgoCD   │ │ Spinnaker│       │   │
│  │  │ 容器编排  │ │ 包管理   │ │ GitOps   │ │ 持续部署 │       │   │
│  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘       │   │
│  └─────────────────────────────────────────────────────────────┘   │
│                                │                                    │
│                                ▼                                    │
│  ┌─────────────────────────────────────────────────────────────┐   │
│  │                      监控与运维 (Monitor)                      │   │
│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │   │
│  │  │Prometheus│ │ Grafana  │ │   ELK    │ │ Jaeger   │       │   │
│  │  │ 指标监控  │ │ 可视化   │ │ 日志分析 │ │ 链路追踪 │       │   │
│  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘       │   │
│  └─────────────────────────────────────────────────────────────┘   │
│                                │                                    │
│                                ▼                                    │
│  ┌─────────────────────────────────────────────────────────────┐   │
│  │                      基础设施 (Infrastructure)                │   │
│  │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐       │   │
│  │  │ Terraform│ │ Ansible  │ │   AWS    │ │  Azure   │       │   │
│  │  │ IaC      │ │ 配置管理 │ │ 云服务   │ │ 云服务   │       │   │
│  │  └──────────┘ └──────────┘ └──────────┘ └──────────┘       │   │
│  └─────────────────────────────────────────────────────────────┘   │
└─────────────────────────────────────────────────────────────────────┘

43.4.2 CI/CD流水线流程

┌─────────────────────────────────────────────────────────────────────┐
│                      CI/CD流水线工作流程                             │
├─────────────────────────────────────────────────────────────────────┤
│                                                                     │
│   ┌──────────┐                                                      │
│   │ 代码提交  │                                                      │
│   │ git push │                                                      │
│   └────┬─────┘                                                      │
│        │                                                            │
│        ▼                                                            │
│   ┌──────────────────────────────────────────────────────────┐     │
│   │                    持续集成 (CI)                          │     │
│   │  ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐    │     │
│   │  │ 代码检查  │ │ 单元测试  │ │ 构建镜像  │ │ 安全扫描  │    │     │
│   │  │ Lint    │ │ Unit Test│ │ Docker   │ │ SAST     │    │     │
│   │  └──────────┘ └──────────┘ └──────────┘ └──────────┘    │     │
│   └──────────────────────────────────────────────────────────┘     │
│        │                                                            │
│        │ 通过所有检查                                               │
│        ▼                                                            │
│   ┌──────────────────────────────────────────────────────────┐     │
│   │                    制品管理                               │     │
│   │  ┌──────────┐ ┌──────────┐ ┌──────────┐                 │     │
│   │  │ 镜像仓库  │ │ 制品存储  │ │ 版本标记  │                 │     │
│   │  │ Registry │ │ Artifacts│ │ Tagging  │                 │     │
│   │  └──────────┘ └──────────┘ └──────────┘                 │     │
│   └──────────────────────────────────────────────────────────┘     │
│        │                                                            │
│        ▼                                                            │
│   ┌──────────────────────────────────────────────────────────┐     │
│   │                    持续部署 (CD)                          │     │
│   │  ┌──────────┐ ┌──────────┐ ┌──────────┐                 │     │
│   │  │ 部署开发  │ │ 部署测试  │ │ 部署生产  │                 │     │
│   │  │  Dev    │ │  Stage   │ │  Prod    │                 │     │
│   │  └──────────┘ └──────────┘ └──────────┘                 │     │
│   └──────────────────────────────────────────────────────────┘     │
│        │                                                            │
│        ▼                                                            │
│   ┌──────────────────────────────────────────────────────────┐     │
│   │                    监控反馈                               │     │
│   │  ┌──────────┐ ┌──────────┐ ┌──────────┐                 │     │
│   │  │ 健康检查  │ │ 性能监控  │ │ 告警通知  │                 │     │
│   │  │ Health   │ │ Metrics  │ │ Alerting │                 │     │
│   │  └──────────┘ └──────────┘ └──────────┘                 │     │
│   └──────────────────────────────────────────────────────────┘     │
│                                                                     │
└─────────────────────────────────────────────────────────────────────┘

43.5 技术选型指南

43.5.1 CI/CD平台选型

平台适用场景特点自托管推荐指数
GitHub ActionsGitHub项目集成度高、生态丰富可选★★★★★
GitLab CIGitLab项目一体化、功能完整支持★★★★★
Jenkins企业级、复杂流程插件丰富、高度可定制支持★★★★☆
CircleCI云原生项目快速、并行构建不支持★★★★☆
Drone容器化项目轻量级、Docker原生支持★★★☆☆

43.5.2 容器编排平台选型

平台适用规模复杂度功能完整性推荐指数
Kubernetes大规模生产★★★★★★★★★★
Docker Swarm中小规模★★★☆☆★★★☆☆
Nomad混合负载★★★★☆★★★★☆
ECSAWS生态★★★★☆★★★★☆

43.5.3 监控方案选型

方案指标监控日志管理链路追踪学习曲线
Prometheus + Grafana★★★★★--
ELK Stack-★★★★★-
Jaeger--★★★★★
Datadog★★★★★★★★★★★★★★★
SkyWalking★★★★☆★★★☆☆★★★★★

43.6 常见问题与解决方案

43.6.1 Docker镜像优化

python
from dataclasses import dataclass
from typing import List, Dict, Optional

class DockerImageOptimizer:
    """Docker镜像优化器"""
    
    @staticmethod
    def generate_optimized_dockerfile(
        base_image: str = "python:3.11-slim",
        app_name: str = "app"
    ) -> str:
        """生成优化的Dockerfile"""
        return f'''# 多阶段构建
FROM python:3.11-slim AS builder
WORKDIR /build
COPY requirements.txt .
RUN pip install --no-cache-dir --target=/build/deps -r requirements.txt

# 最终镜像
FROM python:3.11-slim
WORKDIR /{app_name}

# 安全:非root用户
RUN groupadd -r appuser && useradd -r -g appuser appuser

# 复制依赖
COPY --from=builder /build/deps /usr/local/lib/python3.11/site-packages

# 复制应用代码
COPY --chown=appuser:appuser . .

# 设置环境变量
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# 切换用户
USER appuser

# 健康检查
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \\
    CMD curl -f http://localhost:8000/health || exit 1

CMD ["python", "main.py"]
'''
    
    @staticmethod
    def get_optimization_tips() -> List[str]:
        """获取优化建议"""
        return [
            "使用多阶段构建减小镜像体积",
            "使用.alpine或.slim基础镜像",
            "合并RUN命令减少层数",
            "使用.dockerignore排除不必要文件",
            "不要在镜像中存储敏感信息",
            "使用非root用户运行应用",
            "利用构建缓存加速构建",
            "定期更新基础镜像版本"
        ]
    
    @staticmethod
    def create_dockerignore() -> str:
        """生成.dockerignore文件"""
        return '''
__pycache__
*.pyc
*.pyo
*.pyd
.Python
*.so
.env
.venv
venv/
ENV/
.git
.gitignore
.docker
Dockerfile
docker-compose.yml
README.md
.pytest_cache
.coverage
htmlcov/
*.log
*.tmp
node_modules/
'''


class DockerSecurityChecker:
    """Docker安全检查器"""
    
    SECURITY_CHECKS = [
        {
            "check": "非root用户运行",
            "description": "确保容器以非root用户运行",
            "fix": "在Dockerfile中添加: USER appuser"
        },
        {
            "check": "镜像签名验证",
            "description": "启用Docker Content Trust",
            "fix": "设置环境变量: DOCKER_CONTENT_TRUST=1"
        },
        {
            "check": "只读根文件系统",
            "description": "使容器根文件系统只读",
            "fix": "docker run --read-only ..."
        },
        {
            "check": "资源限制",
            "description": "设置CPU和内存限制",
            "fix": "docker run --memory=512m --cpus=1 ..."
        },
        {
            "check": "安全选项",
            "description": "禁用不必要的特权",
            "fix": "docker run --cap-drop=ALL --cap-add=NET_BIND_SERVICE ..."
        }
    ]
    
    @classmethod
    def get_security_recommendations(cls) -> List[Dict]:
        """获取安全建议"""
        return cls.SECURITY_CHECKS

43.6.2 CI/CD流水线优化

python
from typing import List, Dict, Any
from dataclasses import dataclass

@dataclass
class PipelineStage:
    name: str
    parallel: bool = False
    allow_failure: bool = False
    cache: Dict[str, str] = None

class CIPipelineOptimizer:
    """CI流水线优化器"""
    
    def __init__(self):
        self.stages: List[PipelineStage] = []
        self.caches: Dict[str, str] = {}
    
    def add_cache(self, key: str, path: str):
        """添加缓存配置"""
        self.caches[key] = path
    
    def optimize_parallel(self) -> Dict:
        """优化并行执行"""
        return {
            "strategy": {
                "matrix": {
                    "python-version": ["3.9", "3.10", "3.11"],
                    "os": ["ubuntu-latest", "macos-latest"]
                },
                "fail-fast": False
            }
        }
    
    def generate_cache_config(self) -> Dict:
        """生成缓存配置"""
        return {
            "pip": {
                "key": "${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}",
                "restore-keys": "${{ runner.os }}-pip-",
                "path": "~/.cache/pip"
            },
            "docker": {
                "key": "${{ runner.os }}-docker-${{ hashFiles('Dockerfile') }}",
                "path": "/tmp/.buildx-cache"
            }
        }
    
    @staticmethod
    def get_optimization_strategies() -> List[Dict]:
        """获取优化策略"""
        return [
            {
                "strategy": "并行执行",
                "description": "将独立的任务并行执行",
                "example": "使用matrix策略并行测试多个版本"
            },
            {
                "strategy": "缓存依赖",
                "description": "缓存pip、npm等依赖",
                "example": "actions/cache@v3"
            },
            {
                "strategy": "增量构建",
                "description": "只构建变更的部分",
                "example": "使用Docker layer缓存"
            },
            {
                "strategy": "条件执行",
                "description": "只在特定条件下执行",
                "example": "使用paths过滤触发"
            },
            {
                "strategy": "复用工作流",
                "description": "使用可复用的工作流",
                "example": "使用composite actions"
            }
        ]


class DeploymentStrategy:
    """部署策略"""
    
    @staticmethod
    def blue_green_deployment() -> Dict:
        """蓝绿部署策略"""
        return {
            "strategy": "blue-green",
            "steps": [
                "1. 部署新版本到Green环境",
                "2. 运行冒烟测试验证Green",
                "3. 切换流量到Green环境",
                "4. 监控新版本运行状态",
                "5. 如有问题快速回滚到Blue"
            ],
            "advantages": ["零停机", "快速回滚"],
            "disadvantages": ["需要双倍资源"]
        }
    
    @staticmethod
    def canary_deployment() -> Dict:
        """金丝雀部署策略"""
        return {
            "strategy": "canary",
            "steps": [
                "1. 部署新版本到少量实例",
                "2. 路由小部分流量到新版本",
                "3. 监控关键指标",
                "4. 逐步增加流量比例",
                "5. 全量发布或回滚"
            ],
            "advantages": ["风险可控", "渐进式发布"],
            "disadvantages": ["部署时间较长"]
        }
    
    @staticmethod
    def rolling_deployment() -> Dict:
        """滚动更新策略"""
        return {
            "strategy": "rolling",
            "steps": [
                "1. 逐个更新实例",
                "2. 等待新实例就绪",
                "3. 继续更新下一批",
                "4. 直到所有实例更新完成"
            ],
            "advantages": ["资源效率高", "平滑过渡"],
            "disadvantages": ["回滚较慢"]
        }

43.6.3 Kubernetes配置最佳实践

python
from dataclasses import dataclass, field
from typing import Dict, List, Optional
import yaml

class KubernetesBestPractices:
    """Kubernetes最佳实践"""
    
    @staticmethod
    def resource_limits_template() -> Dict:
        """资源限制模板"""
        return {
            "resources": {
                "requests": {
                    "memory": "128Mi",
                    "cpu": "100m"
                },
                "limits": {
                    "memory": "512Mi",
                    "cpu": "500m"
                }
            }
        }
    
    @staticmethod
    def health_probe_template() -> Dict:
        """健康检查模板"""
        return {
            "livenessProbe": {
                "httpGet": {"path": "/health", "port": 8000},
                "initialDelaySeconds": 30,
                "periodSeconds": 10,
                "timeoutSeconds": 5,
                "failureThreshold": 3
            },
            "readinessProbe": {
                "httpGet": {"path": "/ready", "port": 8000},
                "initialDelaySeconds": 5,
                "periodSeconds": 5,
                "timeoutSeconds": 3,
                "failureThreshold": 3
            }
        }
    
    @staticmethod
    def security_context_template() -> Dict:
        """安全上下文模板"""
        return {
            "securityContext": {
                "runAsNonRoot": True,
                "runAsUser": 1000,
                "readOnlyRootFilesystem": True,
                "allowPrivilegeEscalation": False,
                "capabilities": {
                    "drop": ["ALL"]
                }
            }
        }
    
    @staticmethod
    def pod_disruption_budget_template(min_available: int = 1) -> Dict:
        """Pod中断预算模板"""
        return {
            "apiVersion": "policy/v1",
            "kind": "PodDisruptionBudget",
            "metadata": {"name": "app-pdb"},
            "spec": {
                "minAvailable": min_available,
                "selector": {
                    "matchLabels": {"app": "myapp"}
                }
            }
        }
    
    @staticmethod
    def horizontal_pod_autoscaler_template(
        min_replicas: int = 2,
        max_replicas: int = 10,
        target_cpu: int = 70
    ) -> Dict:
        """HPA模板"""
        return {
            "apiVersion": "autoscaling/v2",
            "kind": "HorizontalPodAutoscaler",
            "metadata": {"name": "app-hpa"},
            "spec": {
                "scaleTargetRef": {
                    "apiVersion": "apps/v1",
                    "kind": "Deployment",
                    "name": "myapp"
                },
                "minReplicas": min_replicas,
                "maxReplicas": max_replicas,
                "metrics": [{
                    "type": "Resource",
                    "resource": {
                        "name": "cpu",
                        "target": {
                            "type": "Utilization",
                            "averageUtilization": target_cpu
                        }
                    }
                }]
            }
        }
    
    @staticmethod
    def get_best_practices_checklist() -> List[str]:
        """获取最佳实践检查清单"""
        return [
            "✓ 设置资源请求和限制",
            "✓ 配置健康检查探针",
            "✓ 使用安全上下文",
            "✓ 配置Pod中断预算",
            "✓ 设置水平自动伸缩",
            "✓ 使用ConfigMap管理配置",
            "✓ 使用Secret管理敏感信息",
            "✓ 配置网络策略",
            "✓ 设置Pod反亲和性",
            "✓ 启用日志收集"
        ]

43.7 本章小结

本章详细介绍了Python DevOps实践的核心概念和实践:

  1. Docker容器化:Dockerfile编写、镜像构建、容器管理
  2. Docker Compose:多容器编排、网络配置、数据持久化
  3. CI/CD流水线:GitHub Actions、GitLab CI、自动化部署
  4. Kubernetes编排:Deployment、Service、ConfigMap、Ingress
  5. 基础设施即代码:配置管理、自动化部署

练习题

  1. 为一个Python Web应用编写完整的Dockerfile
  2. 使用Docker Compose编排一个包含Web、数据库、缓存的应用栈
  3. 编写GitHub Actions工作流,实现自动化测试和部署
  4. 为一个微服务应用编写Kubernetes部署清单
  5. 实现一个蓝绿部署脚本,支持零停机部署

扩展阅读

Python技术丛书 - 江苏省宿城中等专业学校