I have created an AKS cluster using terraform which was in ARM template but I see weird behavior on both. The cluster which is created via ARM is stable with all the pods but the one created using terraform is never stable.
Here goes the tf manifest:
########################
Data sources section
########################
data “azurerm_resource_group” “resource_group_name” {
name = var.resource_group_name
}
data “azuread_service_principal” “aks_client_spn” {
display_name = var.aks_client_spn_name
}
data “azuread_service_principal” “aks_server_spn” {
display_name = var.aks_server_spn_name
}
data “azurerm_key_vault” “keyvault” {
name = var.keyvault_name
resource_group_name = var.resource_group_name
}
data “azurerm_key_vault_secret” “server_app_secret” {
key_vault_id = data.azurerm_key_vault.keyvault.id
name = “${var.aks_server_spn_name}-password”
}
data “azurerm_key_vault_secret” “client_secret” {
key_vault_id = data.azurerm_key_vault.keyvault.id
name = “${var.aks_client_spn_name}-password”
}
data “azurerm_subnet” “aks_subnet” {
name = var.aks_cluster_subnet
resource_group_name = var.vnet_resource_group
virtual_network_name = var.vnet_name
}
#####################
Resources section
#####################
resource “azurerm_kubernetes_cluster” “aks” {
name = var.kubernetes_cluster_name
location = data.azurerm_resource_group.resource_group_name.location
resource_group_name = var.resource_group_name
dns_prefix = var.kubernetes_cluster_name
kubernetes_version = var.kubernetes_version
node_resource_group = “mcp01-aks-${var.kubernetes_cluster_name}-d-rg”
tags = {
FSCPProduct = var.product_tag
}
default_node_pool {
name = "nodepool1"
orchestrator_version = var.kubernetes_version
node_count = var.default_node_pool.node_count
vm_size = var.default_node_pool.vm_size
type = "VirtualMachineScaleSets"
// os_disk_size_gb = var.default_node_pool.os_disk_size_gb
availability_zones = var.default_node_pool.zones
vnet_subnet_id = data.azurerm_subnet.aks_subnet.id
max_pods = var.default_node_pool.max_pods
}
role_based_access_control {
enabled = true
azure_active_directory {
client_app_id = data.azuread_service_principal.aks_client_spn.application_id
server_app_id = data.azuread_service_principal.aks_server_spn.application_id
server_app_secret = data.azurerm_key_vault_secret.server_app_secret.value
}
}
service_principal {
client_id = data.azuread_service_principal.aks_server_spn.application_id
client_secret = data.azurerm_key_vault_secret.server_app_secret.value
}
addon_profile {
oms_agent {
enabled = true
log_analytics_workspace_id = var.log_analytics_workspace_id
}
http_application_routing {
enabled = var.http_application_routing
}
}
network_profile {
load_balancer_sku = "standard"
network_plugin = "azure"
dns_service_ip = "10.236.0.10"
docker_bridge_cidr = "10.237.0.1/16"
service_cidr = "10.236.0.0/16"
}
}
resource “azurerm_monitor_diagnostic_setting” “logananalytics” {
name = “${var.kubernetes_cluster_name}-diagnostic”
target_resource_id = azurerm_kubernetes_cluster.aks.id
log_analytics_workspace_id = var.log_analytics_workspace_id
metric {
category = "AllMetrics"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
log {
category = "kube-apiserver"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
log {
category = "kube-controller-manager"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
log {
category = "kube-scheduler"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
log {
category = "kube-audit"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
log {
category = "cluster-autoscaler"
enabled = var.diagnostics_enabled
retention_policy {
enabled = var.diagnostics_retention_enabled
days = var.diagnostics_retention_days
}
}
}
Here goes the ARM template
“resources”: [
{
"type": "Microsoft.ContainerService/managedClusters",
"name": "[parameters('kubernetesClusterName')]",
"apiVersion": "2020-02-01",
"location": "[parameters('location')]",
"tags": {
"CBSPProduct": "[variables('productTag')]",
"CBSP-Azure-KubernetesService-SecretVersion": "[parameters('servicePrincipalSecretVersion')]"
},
"properties": {
"kubernetesVersion": "[parameters('kubernetesVersion')]",
"nodeResourceGroup": "[parameters('managedResourceGroupName')]",
"enableRBAC": "[parameters('enableRBAC')]",
"dnsPrefix": "[parameters('dnsNamePrefix')]",
"addonProfiles": {
"httpApplicationRouting": {
"enabled": "[parameters('enableHttpApplicationRouting')]"
},
"omsagent": {
"enabled": "[parameters('enableContainerMonitoring')]",
"config": {
"logAnalyticsWorkspaceResourceId": "[parameters('omsWorkspaceResourceId')]"
}
}
},
"agentPoolProfiles": [
{
"name": "nodepool1",
"count": "[parameters('nodeCount')]",
"vmSize": "[parameters('nodeVMSize')]",
"osDiskSizeGB": "[parameters('nodeOSDiskSizeGB')]",
"storageProfile": "ManagedDisks",
"vnetSubnetID": "[variables('vNetSubnetId')]",
"maxPods": "[parameters('maxPods')]",
"osType": "[variables('osType')]",
"type": "VirtualMachineScaleSets",
"availabilityZones": [
"1",
"2",
"3"
]
}
],
"servicePrincipalProfile": {
"clientId": "[parameters('servicePrincipalAppId')]",
"secret": "[parameters('servicePrincipalAppSecret')]"
},
"aadProfile": {
"clientAppID": "[parameters('clientAppId')]",
"serverAppID": "[parameters('servicePrincipalAppId')]",
"serverAppSecret": "[parameters('servicePrincipalAppSecret')]"
},
"networkProfile": {
"networkPlugin": "[variables('networkPlugin')]",
"serviceCidr": "[parameters('serviceCIDR')]",
"dnsServiceIP": "[parameters('dnsServiceIP')]",
"dockerBridgeCidr": "[parameters('dockerBridgeCIDR')]",
"loadBalancerSku": "standard"
}
}
},
{
"type": "Microsoft.ContainerService/managedClusters/providers/locks",
"name": "[concat(parameters('kubernetesClusterName'), '/Microsoft.Authorization/', parameters('kubernetesClusterName'), '-lock')]",
"apiVersion": "2017-04-01",
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('kubernetesClusterName'))]"
],
"properties": {
"level": "CanNotDelete",
"notes": "AKS cluster should not be deleted manually."
}
},
{
"type": "Microsoft.ContainerService/managedClusters/providers/diagnosticSettings",
"name": "[concat(parameters('kubernetesClusterName'), '/Microsoft.Insights/diagnostics')]",
"apiVersion": "2017-05-01-preview",
"dependsOn": [
"[concat('Microsoft.ContainerService/managedClusters/', parameters('kubernetesClusterName'))]"
],
"properties": {
"workspaceId": "[parameters('omsWorkspaceResourceID')]",
"metrics": [
{
"category": "AllMetrics",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
}
],
"logs": [
{
"category": "kube-apiserver",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
},
{
"category": "kube-controller-manager",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
},
{
"category": "kube-scheduler",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
},
{
"category": "kube-audit",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
},
{
"category": "cluster-autoscaler",
"enabled": "[variables('diagnosticsEnabled')]",
"retentionPolicy": {
"enabled": "[variables('diagnosticsRetentionEnabled')]",
"days": "[variables('diagnosticsRetentionDays')]"
}
}
]
}
}
]
Terraform (and AzureRM Provider) Version
terraform version: 0.12.26
AzureRm version: 2.28.0
Error
I0924 07:56:43.995899 1 main.go:60] starting nmi process. Version: 1.6.2. Build date: 2020-07-24-20:26.
E0924 07:56:44.002567 1 reflector.go:153] pkg/mod/k8s.io/client-go@v0.17.2/tools/cache/reflector.go:105: Failed to list *v1.AzurePodIdentityException: Get “https://10.236.0.1:443/apis/aadpodidentity.k8s.io/v1/azurepodidentityexceptions?limit=500&resourceVersion=0”: EOF