diff --git a/config/sit/firewall.yaml b/config/sit/firewall.yaml index 30c29134..0d7c58b7 100644 --- a/config/sit/firewall.yaml +++ b/config/sit/firewall.yaml @@ -1,7 +1,33 @@ firewall_rules: - - name: allow-ssh-web + - name: allow-web-inbound + enabled: true + vpc_name: dev-vpc-1 source_ranges: ["0.0.0.0/0"] egress_ranges: ["10.0.0.0/16"] allow: - protocol: tcp - ports: ["22", "80", "443"] + ports: ["80", "443"] + + - name: dev-vpc-1-default-inbound + enabled: true + vpc_name: dev-vpc-1 + description: Allow ICMP, SSH, and VXLAN from all sources + source_ranges: ["0.0.0.0/0"] + allow: + - protocol: icmp + - protocol: tcp + ports: ["22"] + - protocol: udp + ports: ["4789"] + + - name: dev-vpc-2-default-inbound + enabled: true + vpc_name: dev-vpc-2 + description: Allow ICMP, SSH, and VXLAN from all sources + source_ranges: ["0.0.0.0/0"] + allow: + - protocol: icmp + - protocol: tcp + ports: ["22"] + - protocol: udp + ports: ["4789"] diff --git a/config/sit/instances.yaml b/config/sit/instances.yaml index b6fdc644..fa3af917 100644 --- a/config/sit/instances.yaml +++ b/config/sit/instances.yaml @@ -1,8 +1,9 @@ instances: - name: master-1 - ami: ubuntu-22.04 # ✅ 可用 ami-xxx 或关键词(如 ubuntu-22.04) + ami: ubuntu-24.04 # ✅ 可用 ami-xxx 或关键词(如 ubuntu-22.04) type: t3a.xlarge disk_size_gb: 20 + sg_names: ["dev-vpc-1-default-inbound"] subnet: dev-vpc-1-public-subnet-1 lifecycle: spot # 可选: ondemand(默认)或 spot ttl: 1h # 可选: 标记生命周期(不会自动销毁) @@ -11,8 +12,9 @@ instances: associate_public_ip: true # ✅ 明确配置是否需要公网 IP - name: slave-1 - ami: ubuntu-22.04 + ami: ubuntu-24.04 type: t3.nano + sg_names: ["dev-vpc-2-default-inbound"] disk_size_gb: 20 subnet: dev-vpc-2-public-subnet-1 lifecycle: spot @@ -22,10 +24,11 @@ instances: associate_public_ip: true - name: agent-1 - ami: ubuntu-22.04 + ami: ubuntu-24.04 type: t3.nano disk_size_gb: 20 subnet: dev-vpc-1-private-subnet-1 + sg_names: ["dev-vpc-1-default-inbound"] lifecycle: spot ttl: 1h env: sit @@ -33,10 +36,11 @@ instances: associate_public_ip: true - name: agent-2 - ami: ubuntu-22.04 + ami: ubuntu-24.04 type: t3.nano disk_size_gb: 20 subnet: dev-vpc-2-private-subnet-1 + sg_names: ["dev-vpc-2-default-inbound"] lifecycle: spot ttl: 1h env: sit diff --git a/iac_modules/pulumi/__main__.py b/iac_modules/pulumi/__main__.py index d26d0ac6..b0884541 100644 --- a/iac_modules/pulumi/__main__.py +++ b/iac_modules/pulumi/__main__.py @@ -59,16 +59,41 @@ else: pulumi.log.warn("⏭️ 跳过 VPC 创建") # ======================== -# ✅ [模块] Security Group +# ✅ [模块] 多个 Security Group # ======================== + +# ✅ 存储 VPC 结果(名字 → 资源) +vpc_map = {vpc_name: result["vpc"] for vpc_name, result in vpc_results.items()} + firewall_rules = config.get("firewall_rules", []) -if firewall_rules and vpc and config.get("security_group", {}).get("enabled", True): - sg = create_security_group(vpc.id, firewall_rules[0]) - global_dependencies.append(sg) - pulumi.log.info("✅ Security Group 已创建") +security_groups = {} + +if firewall_rules and config.get("security_group", {}).get("enabled", True): + for rule in firewall_rules: + if not rule.get("enabled", True): + pulumi.log.warn(f"⏭️ 跳过未启用的 SG: {rule.get('name')}") + continue + + vpc_name = rule.get("vpc_name") + if not vpc_name or vpc_name not in vpc_map: + pulumi.log.warn(f"❌ 未找到指定 VPC: {vpc_name},跳过 {rule.get('name')}") + continue + + vpc_resource = vpc_map[vpc_name] + + sg = create_security_group(vpc_resource.id, rule) + name = rule.get("name", "sg-unnamed") + security_groups[name] = sg + global_dependencies.append(sg) + + # 确保 SG 创建等待 VPC 完成 + pulumi.log.info(f"✅ Security Group '{name}' 已绑定 VPC: {vpc_name}") + + pulumi.export("security_groups", {k: sg.id for k, sg in security_groups.items()}) else: pulumi.log.warn("⏭️ 跳过 Security Group 创建") + # ======================== # ✅ [模块] SSH Key Pair # ======================== @@ -88,6 +113,10 @@ if key_pairs: else: pulumi.log.warn("⏭️ 跳过 KeyPair 创建") +# ======================== +# ✅ [模块] EC2 实例部署 +# ======================== + # ======================== # ✅ [模块] EC2 实例部署 # ======================== @@ -95,13 +124,26 @@ instances_conf = config.get("instances", []) ec2_outputs = {} if instances_conf and config.get("ec2", {}).get("enabled", True): + # ✅ 遍历每个实例,按 sg_names 匹配对应 Security Group ID 列表 + def resolve_security_group_ids(instance_conf, sg_map): + sg_ids = [] + for name in instance_conf.get("sg_names", []): + sg = sg_map.get(name) + if sg: + sg_ids.append(sg.id) + else: + pulumi.log.warn(f"⚠️ 实例 {instance_conf['name']} 引用了未知 SG: {name}") + return sg_ids + + # ✅ 批量传入所有实例配置 ec2_outputs = create_instances( instances_conf, subnets, - sg, # ✅ 注意这里传的是资源对象 + security_groups, # ✅ 多 SG 映射 sg_name → resource key_pair.key_name if key_pair else None, depends_on=global_dependencies ) + pulumi.log.info("✅ EC2 实例已创建") else: pulumi.log.warn("⏭️ 跳过 EC2 实例部署") diff --git a/iac_modules/pulumi/modules/ec2/ec2_instance.py b/iac_modules/pulumi/modules/ec2/ec2_instance.py index 8675636e..5c9c988d 100644 --- a/iac_modules/pulumi/modules/ec2/ec2_instance.py +++ b/iac_modules/pulumi/modules/ec2/ec2_instance.py @@ -3,7 +3,7 @@ import pulumi import pulumi_aws as aws from .utils import resolve_ami -def create_instances(instances_config, subnets_dict, sg_resource, key_name, depends_on=None): +def create_instances(instances_config, subnets_dict, sg_map: dict, key_name, depends_on=None): outputs = {} for instance_cfg in instances_config: @@ -12,14 +12,11 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe subnet = subnets_dict[subnet_name] subnet_id = subnet.id - # ✅ 自动解析 AMI(关键词或 AMI ID) region = aws.config.region ami = resolve_ami(instance_cfg["ami"], region) - instance_type = instance_cfg["type"] disk_size = instance_cfg["disk_size_gb"] - # ✅ 可选字段解析 lifecycle = instance_cfg.get("lifecycle", "ondemand") ttl = instance_cfg.get("ttl", "none") env = instance_cfg.get("env", "dev") @@ -28,7 +25,7 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe private_ip = instance_cfg.get("private_ip", None) associate_public_ip = instance_cfg.get("associate_public_ip", True) - # ✅ User data 读取(可选) + # ✅ User data user_data = None if user_data_path: expanded_path = os.path.expanduser(user_data_path) @@ -38,7 +35,6 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe else: pulumi.log.warn(f"⚠️ user_data 文件不存在: {expanded_path}") - # ✅ 标签定义 tags = { "Name": name, "Lifecycle": lifecycle, @@ -58,14 +54,24 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe ) ) - # ✅ 构建依赖项(必须是 Resource 对象) + # ✅ 解析 security group ids(通过名字) + sg_names = instance_cfg.get("sg_names", []) + security_group_ids = [] + for sg_name in sg_names: + sg = sg_map.get(sg_name) + if sg: + security_group_ids.append(sg.id) + else: + pulumi.log.warn(f"⚠️ 实例 '{name}' 引用的 SG '{sg_name}' 未找到,已跳过") + + # ✅ 构建依赖项 resource_dependencies = [subnet] - if isinstance(sg_resource, pulumi.Resource): - resource_dependencies.append(sg_resource) + for sg in security_group_ids: + resource_dependencies.append(sg_map.get(sg_name)) if depends_on: resource_dependencies.extend(depends_on) - # ✅ 创建 EC2 实例 + # ✅ 创建实例 ec2 = aws.ec2.Instance(name, ami=ami, instance_type=instance_type, @@ -73,7 +79,7 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe subnet_id=subnet_id, private_ip=private_ip, associate_public_ip_address=associate_public_ip, - vpc_security_group_ids=[sg_resource.id] if sg_resource else [], + vpc_security_group_ids=security_group_ids, user_data=user_data, root_block_device={ "volume_size": disk_size, @@ -84,7 +90,6 @@ def create_instances(instances_config, subnets_dict, sg_resource, key_name, depe opts=pulumi.ResourceOptions(depends_on=resource_dependencies) ) - # ✅ 输出信息收集 outputs[name + "_id"] = ec2.id outputs[name + "_public_ip"] = ec2.public_ip outputs[name + "_private_ip"] = ec2.private_ip diff --git a/iac_modules/pulumi/modules/ec2/utils.py b/iac_modules/pulumi/modules/ec2/utils.py index 4ae0d2ed..cfbbd266 100644 --- a/iac_modules/pulumi/modules/ec2/utils.py +++ b/iac_modules/pulumi/modules/ec2/utils.py @@ -1,9 +1,28 @@ import pulumi_aws as aws -def resolve_ami(ami_keyword: str, region: str) -> str: - """ - 根据关键词解析 AMI ID。如果已是 AMI ID,则直接返回。 - """ +AMI_MAP = { + "ubuntu-22.04": ("099720109477", "*ubuntu*22.04*"), + "ubuntu-24.04": ("099720109477", "*ubuntu*24.04*"), + "rocky-8.10": ("792107900819", "Rocky-8-ec2-8.10*"), + "amazonlinux-2": ("137112412989", "amzn2-ami-hvm-*-gp2"), + "amazonlinux-2023": ("137112412989", "al2023-ami-*-x86_64"), + "debian-12": ("136693071363", "debian-12-*"), + "almalinux-9": ("151447241410", "AlmaLinux-9-*"), +} + +def query_latest_ami(owner: str, name_filter: str, architecture: str = "x86_64") -> str: + result = aws.ec2.get_ami( + most_recent=True, + owners=[owner], + filters=[ + {"name": "name", "values": [name_filter]}, + {"name": "architecture", "values": [architecture]}, + {"name": "virtualization-type", "values": ["hvm"]}, + ], + ) + return result.id + +def resolve_ami(ami_keyword: str, region: str, architecture: str = "x86_64") -> str: if not aws.config.region: raise ValueError("❌ AWS region is not set. Please set aws.config.region before calling resolve_ami") @@ -11,27 +30,14 @@ def resolve_ami(ami_keyword: str, region: str) -> str: return ami_keyword keyword = ami_keyword.lower() + print(f"🔍 Resolving AMI for keyword='{keyword}' in region='{region}' with arch='{architecture}'") - if keyword in ["ubuntu-22.04", "ubuntu22.04"]: - result = aws.ec2.get_ami( - most_recent=True, - owners=["099720109477"], # Canonical - filters=[ - {"name": "name", "values": ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]}, - {"name": "virtualization-type", "values": ["hvm"]}, - ], - ) - return result.id + if keyword in AMI_MAP: + owner, name_filter = AMI_MAP[keyword] + try: + return query_latest_ami(owner, name_filter, architecture) + except Exception as e: + raise ValueError(f"❌ Failed to find AMI for '{keyword}' in region '{region}': {e}") - if keyword in ["rocky-8.10", "rockylinux-8.10", "rocky8.10"]: - result = aws.ec2.get_ami( - most_recent=True, - owners=["792107900819"], # Rocky Linux - filters=[ - {"name": "name", "values": ["Rocky-8-ec2-8.10*x86_64"]}, - {"name": "architecture", "values": ["x86_64"]}, - ], - ) - return result.id + raise ValueError(f"❌ Unsupported AMI keyword: {ami_keyword}. Supported keywords: {list(AMI_MAP.keys())}") - raise ValueError(f"❌ Unsupported AMI keyword: {ami_keyword}") diff --git a/iac_modules/pulumi/modules/security_group/sg.py b/iac_modules/pulumi/modules/security_group/sg.py index 0dbe290a..1f7099b2 100644 --- a/iac_modules/pulumi/modules/security_group/sg.py +++ b/iac_modules/pulumi/modules/security_group/sg.py @@ -3,7 +3,7 @@ from pulumi_aws.ec2 import SecurityGroup, SecurityGroupIngressArgs, SecurityGrou def create_security_group(vpc_id: str, rule_config: dict) -> SecurityGroup: """ - 创建 Security Group,支持 ingress/egress 配置 + 创建 Security Group,支持 ingress/egress 配置,包括 TCP, UDP, ICMP :param vpc_id: 目标 VPC ID :param rule_config: 单个 firewall_rules 的字典配置 :return: 创建的 SecurityGroup 资源对象 @@ -14,16 +14,28 @@ def create_security_group(vpc_id: str, rule_config: dict) -> SecurityGroup: egress_ranges = rule_config.get("egress_ranges", ["0.0.0.0/0"]) for allow_rule in rule_config.get("allow", []): - protocol = allow_rule.get("protocol", "tcp") + protocol = allow_rule.get("protocol", "tcp").lower() + ports = allow_rule.get("ports", []) - for port in allow_rule.get("ports", []): - if isinstance(port, str) and port in ["*", "any", "all"]: - from_port = 0 - to_port = 65535 + # ICMP 无需端口处理 + if protocol == "icmp": + ingress_rules.append( + SecurityGroupIngressArgs( + protocol="icmp", + from_port=-1, + to_port=-1, + cidr_blocks=source_ranges + ) + ) + continue + + # 处理 TCP/UDP 等需要端口的协议 + for port in ports: + if isinstance(port, str) and port.lower() in ["*", "any", "all"]: + from_port, to_port = 0, 65535 else: port = int(port) - from_port = port - to_port = port + from_port = to_port = port ingress_rules.append( SecurityGroupIngressArgs( @@ -34,6 +46,7 @@ def create_security_group(vpc_id: str, rule_config: dict) -> SecurityGroup: ) ) + # 创建 Security Group sg = aws.ec2.SecurityGroup( rule_config.get("name", "default-sg"), vpc_id=vpc_id, @@ -51,4 +64,3 @@ def create_security_group(vpc_id: str, rule_config: dict) -> SecurityGroup: ) return sg -