Saturday, April 11, 2020

EKS security


1. Limit the access to cluster api server

Use UI or following command:

aws eks update-cluster-config \
    --region us-east-1 \
    --name co-ec-eks-cluster-vpc-05b52a0ba174eeeee \
--resources-vpc-config endpointPublicAccess=true,publicAccessCidrs="19.19.19.19/32",endpointPrivateAccess=true

Use UI (EKS Networking section) or following command to check change result:
aws eks describe-cluster --name co-ec-eks-cluster-vpc-05b52a0ba174eeeee --region us-east-1

We can also do this from terraform through following code.  Sample code:

resource "aws_eks_cluster" "co-ec-eks-cluster" {
 
name     = local.eks_cluster_name
  role_arn
= aws_iam_role.co-ec-eks-cluster-iam-role.arn

 
vpc_config {
   
security_group_ids      = [aws_security_group.co-ec-eks-cluster-security-group.id]
   
subnet_ids              = local.subnet_ids
    endpoint_private_access
= true // allow access to EKS network
    // https://www.cloudflare.com/ips-v4 for list of IPs from Cloudflare
   
public_access_cidrs = toset(concat(data.cloudflare_ip_ranges.cloudflare.ipv4_cidr_blocks, local.workstation-external-cidr))
  }

 
depends_on = [
    aws_iam_role_policy_attachment.co-ec-eks-cluster-AmazonEKSClusterPolicy,
    aws_iam_role_policy_attachment.co-ec-eks-cluster-AmazonEKSServicePolicy,
  ]
}


2. Access control to EKS cluster node (so the services running on it will have access to the resources) through terraform:



resource "aws_iam_role" "fs-ec-eks-node-iam-role" {
  name = "fs-ec-eks-node-iam-role-${local.vpc_id}"
  assume_role_policy = <<POLICY
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "ec2.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
}
POLICY
}

resource "aws_iam_role_policy" "fs-ec-eks-node-auto-scale-policy" {
  name = "fs-ec-eks-node-auto-scale-policy"  role = aws_iam_role.fs-ec-eks-node-iam-role.id
  policy = <<-EOF
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "autoscaling:DescribeAutoScalingGroups",
                "autoscaling:DescribeAutoScalingInstances",
                "autoscaling:DescribeLaunchConfigurations",
                "autoscaling:DescribeTags",
                "autoscaling:SetDesiredCapacity",
                "autoscaling:TerminateInstanceInAutoScalingGroup"
            ],
            "Resource": "*"
        }
    ]
}
EOF
}

resource "aws_iam_role_policy" "fs-ec-eks-node-metrics-access-policy" {
  name = "fs-ec-eks-node-metrics-access-policy"  role = aws_iam_role.fs-ec-eks-node-iam-role.id
  policy = <<-EOF
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "cloudwatch:GetMetricData",
        "cloudwatch:GetMetricStatistics",
        "cloudwatch:ListMetrics"
      ],
      "Resource": "*"
    }
  ]
}
EOF
}

resource "aws_iam_role_policy_attachment" "fs-ec-eks-node-AmazonEKSWorkerNodePolicy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"  role       = aws_iam_role.fs-ec-eks-node-iam-role.name}

resource "aws_iam_role_policy_attachment" "fs-ec-eks-node-AmazonEKS_CNI_Policy" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"  role       = aws_iam_role.fs-ec-eks-node-iam-role.name}

resource "aws_iam_role_policy_attachment" "fs-ec-eks-node-AmazonEC2ContainerRegistryReadOnly" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"  role       = aws_iam_role.fs-ec-eks-node-iam-role.name}

resource "aws_iam_role_policy_attachment" "fs-ec-eks-node-CloudWatchAgentServerPolicy" {
  policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"  role       = aws_iam_role.fs-ec-eks-node-iam-role.name}

resource "aws_iam_role_policy_attachment" "fs-ec-eks-node-AmazonDynamoDBFullAccess" {
  policy_arn = "arn:aws:iam::aws:policy/AmazonDynamoDBFullAccess"  role       = aws_iam_role.fs-ec-eks-node-iam-role.name}


# Using the new feature from reinvent:19 to provisioning node automatically without the need# for EC2 provisioning.  EKS-optimized AMIs will be used automatically for each node.# Nodes launched as part of a managed node group are automatically tagged for auto-discovery# by k8s cluster autoscaler.# https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html# https://www.terraform.io/docs/providers/aws/r/eks_node_group.htmlresource "aws_eks_node_group" "fs-ec-eks-node-group" {
  cluster_name    = aws_eks_cluster.fs-ec-eks-cluster.name  node_group_name = "fs-ec-eks-node-group-${local.vpc_id}"  node_role_arn   = aws_iam_role.fs-ec-eks-node-iam-role.arn  subnet_ids      = local.subnet_ids  instance_types  = [var.instance_type]

  scaling_config {
    desired_size = 3    max_size     = 8 // TODO    min_size     = 3  }

  depends_on = [
    aws_iam_role_policy_attachment.fs-ec-eks-node-AmazonEKSWorkerNodePolicy,
    aws_iam_role_policy_attachment.fs-ec-eks-node-AmazonEKS_CNI_Policy,
    aws_iam_role_policy_attachment.fs-ec-eks-node-AmazonEC2ContainerRegistryReadOnly,
    aws_iam_role_policy_attachment.fs-ec-eks-node-CloudWatchAgentServerPolicy,
    aws_iam_role_policy_attachment.fs-ec-eks-node-AmazonDynamoDBFullAccess,
  ]
}


3. Control the access to MSK(Kafka):

resource "aws_security_group" "fs-ec-msk-cluster-security-group" {
  name        = "fs-ec-msk-cluster-security-group-${local.vpc_id}"  description = "Cluster communication with worker nodes"  vpc_id      = local.vpc_id
  egress {
    from_port   = 0    to_port     = 0    protocol    = "-1"    cidr_blocks = ["0.0.0.0/0"]
  }

  tags = {
    Name = "fs-ec-msk-cluster-${local.vpc_id}"  }
}

# allow access from every host in the same vpc.// TODOresource "aws_security_group_rule" "fs-ec-msk-cluster-ingress-workstation-http" {
  cidr_blocks       = [local.vpc_cidr]
  description       = "Allow access to Kafka from same VPC"  from_port         = 9092  protocol          = "tcp"  security_group_id = aws_security_group.fs-ec-msk-cluster-security-group.id  to_port           = 9092  type              = "ingress"}
resource "aws_security_group_rule" "fs-ec-msk-cluster-ingress-workstation-https" {
  cidr_blocks       = [local.vpc_cidr]
  description       = "Allow access to Kafka from same VPC"  from_port         = 9094  protocol          = "tcp"  security_group_id = aws_security_group.fs-ec-msk-cluster-security-group.id  to_port           = 9094  type              = "ingress"}
resource "aws_security_group_rule" "fs-ec-msk-cluster-ingress-workstation-zookeeper" {
  cidr_blocks       = [local.vpc_cidr]
  description       = "Allow access to Zookeeper from same VPC"  from_port         = 2181  protocol          = "tcp"  security_group_id = aws_security_group.fs-ec-msk-cluster-security-group.id  to_port           = 2181  type              = "ingress"}

resource "aws_kms_key" "fs-ec-kms" {
  description = "KMS key"}

resource "aws_msk_cluster" "fs-ec-msk-cluster" {
  cluster_name           = "fs-ec-msk-cluster-${local.vpc_id}"  kafka_version          = var.kafka_version  number_of_broker_nodes = length(local.subnets_ids)

  configuration_info {
    arn      = aws_msk_configuration.fs-ec-msk-configuration.arn    revision = aws_msk_configuration.fs-ec-msk-configuration.latest_revision  }

  broker_node_group_info {
    instance_type   = var.broker_type    ebs_volume_size = var.broker_ebs_size    client_subnets  = local.subnets_ids
    security_groups = [aws_security_group.fs-ec-msk-cluster-security-group.id]
  }

  encryption_info {
    encryption_at_rest_kms_key_arn = aws_kms_key.fs-ec-kms.arn    encryption_in_transit {
      client_broker = "TLS" // PLAINTEXT"        in_cluster = true    }
  }

  tags = {
    Name = "fs-ec-msk-cluster-${local.vpc_id}"  }
}

// it is not possible to destroy cluster configs so a random number is usedresource "random_id" "msk" {
  byte_length = 4}

resource "aws_msk_configuration" "fs-ec-msk-configuration" {
  kafka_versions = [var.kafka_version]
  name           = "${var.msk_config_name_prefix}fs-ec-msk-configuration-${local.vpc_id}-${random_id.msk.hex}"
  server_properties = <<PROPERTIES
auto.create.topics.enable = true
delete.topic.enable = false
num.partitions = 96
PROPERTIES
}




Reference:









EKS cluster autoscaler


1. Enable CA in eks-worker-nodes.tf

# Using the new feature from reinvent:19 to provisioning node automatically without the need
# for EC2 provisioning. EKS-optimized AMIs will be used automatically for each node.
# Nodes launched as part of a managed node group are automatically tagged for auto-discovery
# by k8s cluster autoscaler.
# https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html
# https://www.terraform.io/docs/providers/aws/r/eks_node_group.html
resource "aws_eks_node_group" "co-ec-eks-node-group" {
cluster_name = aws_eks_cluster.co-ec-eks-cluster.name
node_group_name = "co-ec-eks-node-group-${local.vpc_id}"
node_role_arn = aws_iam_role.co-ec-eks-node-iam-role.arn
subnet_ids = local.subnet_ids instance_types = [var.instance_type] scaling_config {
desired_size = 3
max_size = 8
min_size = 3 } depends_on = [
aws_iam_role_policy_attachment.co-ec-eks-node-AmazonEKSWorkerNodePolicy,
aws_iam_role_policy_attachment.co-ec-eks-node-AmazonEKS_CNI_Policy,
aws_iam_role_policy_attachment.co-ec-eks-node-AmazonEC2ContainerRegistryReadOnly,
aws_iam_role_policy_attachment.co-ec-eks-node-CloudWatchAgentServerPolicy,
aws_iam_role_policy_attachment.co-ec-eks-node-AmazonDynamoDBFullAccess,
]
}


2. Add new IAM policy to EKS node IAM role

resource "aws_iam_role_policy" "co-ec-eks-node-auto-scale-policy" {
  name = "co-ec-eks-node-auto-scale-policy"   
  role = aws_iam_role.co-ec-eks-node-iam-role.id
  policy = <<-EOF
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "autoscaling:DescribeAutoScalingGroups",
                "autoscaling:DescribeAutoScalingInstances",
                "autoscaling:DescribeLaunchConfigurations",
                "autoscaling:DescribeTags",
                "autoscaling:SetDesiredCapacity",
                "autoscaling:TerminateInstanceInAutoScalingGroup"
            ],
            "Resource": "*"
        }
    ]
}
 

(you can also add such policy from AWS Console to the Node IAM Role)


3. Download the sample file and rename it to cluster_autoscaler_asg, yaml file:

https://aws.amazon.com/premiumsupport/knowledge-center/eks-cluster-autoscaler-setup/

4. Make following changes to the yaml file:

<             - --expander=least-waste
<             - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/<YOUR CLUSTER NAME>
---
>             - --nodes={{MIN_NODE}}:{{MAX_NODE}}:{{K8S_NODE_ASG}}

One example is:


    - --nodes=1:10:eks-22b82878-2fa8-201b-8842-8f7f1aeeeee



5. Apply autoscaler deployment:

cat cluster-autoscaler-asg.yaml | sed "s/{{K8S_NODE_ASG}}/$K8S_NODE_ASG/;s/{{MIN_NODE}}/$MIN_NODE/;s/{{MAX_NODE}}/$MAX_NODE/" | kubectl apply -f -


References

https://aws.amazon.com/premiumsupport/knowledge-center/eks-cluster-autoscaler-setup/


Saturday, December 21, 2019

EKS access from kubectl



1. Access EKS when using AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY

(for any user who has the key and secret, it is easy)

$ export AWS_ACCESS_KEY_ID=
$ export AWS_SECRET_ACCESS_KEY=
$ export KUBECONFIG=~/.kube/config...


2. Access EKS without AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY

(for user 'xyz' who has no aws key and secret, do this)

2.1. Ask admin to update the aws-auth.cm.yaml to add the 'xyz' user to aws-auth configmap:

  mapUsers: |
    - userarn: arn:aws:iam::226347999999:user/xyz
      username: xyz
      groups:
        - system:masters

2.2. Ask admin to run: kubectl apply -f aws-auth-cm.yaml

2.3. On command line window of user 'xyz', do this:

$ export KUBECONFIG=~/.kube/config...

2.4. Now user 'xyz' should be able to run:

$ kubectl get pod


** To add additional role (instead of user), you add this to configmap file under 'mapRoles' :

    - rolearn: arn:aws:iam::853899999999:role/test-role
      username: aws
      groups:
        - system:masters

3. Useful commands

3.1. Get the client caller identity

$ aws sts get-caller-identity

Sample result:

{
    "Account": "226347999999",
    "UserId": "AIDATJM2ZIW2LLLLLLLLL",
    "Arn": "arn:aws:iam::226347999999:user/jzeng"
}

3.2. Find who can access EKS

$ kubectl describe configmap aws-auth -n kube-system

Sample result:

Name:         aws-auth
Namespace:    kube-system
Labels:       <none>
Annotations:  kubectl.kubernetes.io/last-applied-configuration:
                {"apiVersion":"v1","data":{"mapRoles":"- rolearn: arn:aws:iam::853844999999:role/co-ec-eks-node-iam-role-vpc-078a850cd7eeeeeee\n  username...

Data
====
mapUsers:
----
- userarn: arn:aws:iam::226347999999:user/jzeng
  username: jzeng
  groups:
    - system:masters

mapRoles:
----
- rolearn: arn:aws:iam::853844999999:role/co-ec-eks-node-iam-role-vpc-078a850cd7eeeeeee
  username: system:node:{{EC2PrivateDNSName}}
  groups:
    - system:bootstrappers
    - system:nodes

3.3. Get access token

$ aws-iam-authenticator token -i {eks_cluster_name}


Reference:

https://aws.amazon.com/premiumsupport/knowledge-center/amazon-eks-cluster-access/


Sunday, December 1, 2019

Create secrets for terraform code to access docker image from different AWS account


Setup secret for accessing ECR and loading docker image:

  • Copy this python snippet below into generate_secret_key.py.
#!/usr/bin/env python
 
import re
import subprocess
 
def execute_cmd(cmd):
  proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  comm = proc.communicate()
 
  if comm[1] != '':
    print(comm[1].rstrip('\n'))
    exit(-1)
 
  return comm[0]
 
def generate_secret_key():
  login_cmd = execute_cmd('aws ecr get-login').rstrip('\n')
  creds = re.sub(r"(-e none\ |docker login\ |-u\ |-p\ )", '', login_cmd).split(' ')
  generate_secret_cmd = "kubectl create secret docker-registry {0} --docker-username={1} --docker-password={2} --docker-server={3} --docker-email=YOUR_EMAIL_ADDRESS"
  execute_cmd(generate_secret_cmd.format(‘ecr.us-west-2’, creds[0], creds[1], creds[2].replace('https://', '')))
 
if __name__ == "__main__":
  generate_secret_key()
NOTE: Remember to change YOUR_EMAIL_ADDRESS .
  • Change the file permission and execute it.
  • Make sure the right AWS account info is used by running ‘aws ecr get-login’ (Use ‘export AWS_PROFILE={profile_name_in_.aws_config}’ to switch AWS account – make sure both kubeconfig and aws conf profile are pointing to the same AWS account!!!)

Above step will create a secret called ‘ecr.us-west-2’ for terraform to use to access ECR without permission issue.  Secrets will expire so we need to re-run such script to regenerate it.  Run ‘kubectl get secrets’ to check if the secrets ‘ecr.us-west-2’ is still there or not.

Put following to terraform code to use such secret to get docker image:

image_pull_secrets {
 
name = "ecr.us-west-2"
}


Setup secret for accessing ECR from different AWS accout and loading docker image:


1.     Use ‘export AWS_PROFILE={profile-name}’ to switch to the account we will deploy ECR image to.
2.     Deploy EKS, DynamoDB, etc.
3.     Add permission to the ECR in source account for each image:

  "Statement": [
    {
      "Sid": "AllowPull",
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:iam::853844999999:user/terraform-project-development"
      },
      "Action": [
        "ecr:BatchCheckLayerAvailability",
        "ecr:BatchGetImage",
        "ecr:GetDownloadUrlForLayer"
      ]
    }
  ]


4.     Run following python code:

LT-2018-6666:dev jzeng$ cat ../../us-east-1/dev-test/generate_secret_key.py
#!/usr/bin/env python

import re
import subprocess

def execute_cmd(cmd):
  proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  comm = proc.communicate()

  if comm[1] != '':
    print(comm[1].rstrip('\n'))
    exit(-1)

  return comm[0]

def generate_secret_key():
  login_cmd = execute_cmd('aws ecr get-login --registry-ids 226349999999 --region us-west-2').rstrip('\n')
  creds = re.sub(r"(-e none\ |docker login\ |-u\ |-p\ )", '', login_cmd).split(' ')
  generate_secret_cmd = "kubectl create secret docker-registry {0} --docker-username={1} --docker-password={2} --docker-server={3} --docker-email=john.lastname@company.com"
  execute_cmd(generate_secret_cmd.format('ecr.secret.226349999999.us-west-2', creds[0], creds[1], creds[2].replace('https://', '')))

if __name__ == "__main__":
  generate_secret_key()


5.     Use “ecr.secret.226349999999.us-west-2” as secret name in terraform code.


Access ECR images from different accounts without secret


(Used for DTAP env) Use following JSON to set up permission on each source ECR repository:

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Sid": "dev account access",
      "Effect": "Allow",
      "Principal": {
        "AWS": [
          "arn:aws:iam::874429999999:root",  //D
          "arn:aws:iam::853848888888:root",   //T
          "arn:aws:iam::527037777777:root",   //A
          "arn:aws:iam::387656666666:root"   //P
        ]
      },
      "Action": [
        "ecr:BatchCheckLayerAvailability",
        "ecr:BatchGetImage",
        "ecr:CompleteLayerUpload",
        "ecr:DescribeImages",
        "ecr:DescribeRepositories",
        "ecr:GetDownloadUrlForLayer",
        "ecr:GetLifecyclePolicy",
        "ecr:GetLifecyclePolicyPreview",
        "ecr:GetRepositoryPolicy",
        "ecr:InitiateLayerUpload",
        "ecr:ListImages",
        "ecr:PutImage",
        "ecr:PutLifecyclePolicy",
        "ecr:UploadLayerPart"
      ]
    }
  ]
}



Reference: