diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index d2ad9736a..0bcfd0547 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -194,24 +194,16 @@ func (d *EC2Discovery) ec2Client(ctx context.Context) (*ec2.EC2, error) { return d.ec2, nil } -func (d *EC2Discovery) azID(ctx context.Context, az string) (string, error) { - if azID, ok := d.azToAZID[az]; ok { - return azID, nil - } - +func (d *EC2Discovery) refreshAZIDs(ctx context.Context) error { azs, err := d.ec2.DescribeAvailabilityZonesWithContext(ctx, &ec2.DescribeAvailabilityZonesInput{}) if err != nil { - return "", errors.Wrap(err, "could not describe availability zones") + return err } d.azToAZID = make(map[string]string, len(azs.AvailabilityZones)) for _, az := range azs.AvailabilityZones { d.azToAZID[*az.ZoneName] = *az.ZoneId } - - if azID, ok := d.azToAZID[az]; ok { - return azID, nil - } - return "", fmt.Errorf("no availability zone ID mapping found for %s", az) + return nil } func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { @@ -232,21 +224,23 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error }) } - input := &ec2.DescribeInstancesInput{Filters: filters} + // Only refresh the AZ ID map if we have never been able to build one. + // Prometheus requires a reload if AWS adds a new AZ to the region. + if d.azToAZID == nil { + if err := d.refreshAZIDs(ctx); err != nil { + level.Debug(d.logger).Log( + "msg", "Unable to describe availability zones", + "err", err) + } + } + input := &ec2.DescribeInstancesInput{Filters: filters} if err := ec2Client.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { for _, r := range p.Reservations { for _, inst := range r.Instances { if inst.PrivateIpAddress == nil { continue } - azID, err := d.azID(ctx, *inst.Placement.AvailabilityZone) - if err != nil { - level.Warn(d.logger).Log( - "msg", "Unable to determine availability zone ID", - "az", *inst.Placement.AvailabilityZone, - "err", err) - } labels := model.LabelSet{ ec2LabelInstanceID: model.LabelValue(*inst.InstanceId), @@ -271,9 +265,14 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error labels[ec2LabelPublicIP] = model.LabelValue(*inst.PublicIpAddress) labels[ec2LabelPublicDNS] = model.LabelValue(*inst.PublicDnsName) } - labels[ec2LabelAMI] = model.LabelValue(*inst.ImageId) labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) + azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone] + if !ok && d.azToAZID != nil { + level.Debug(d.logger).Log( + "msg", "Availability zone ID not found", + "az", *inst.Placement.AvailabilityZone) + } labels[ec2LabelAZID] = model.LabelValue(azID) labels[ec2LabelInstanceState] = model.LabelValue(*inst.State.Name) labels[ec2LabelInstanceType] = model.LabelValue(*inst.InstanceType) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index e48ba6a0b..d5dd52d05 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -898,7 +898,7 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_ec2_ami`: the EC2 Amazon Machine Image * `__meta_ec2_architecture`: the architecture of the instance * `__meta_ec2_availability_zone`: the availability zone in which the instance is running -* `__meta_ec2_availability_zone_id`: the [availability zone ID](https://docs.aws.amazon.com/ram/latest/userguide/working-with-az-ids.html) in which the instance is running +* `__meta_ec2_availability_zone_id`: the [availability zone ID](https://docs.aws.amazon.com/ram/latest/userguide/working-with-az-ids.html) in which the instance is running (requires `ec2:DescribeAvailabilityZones`) * `__meta_ec2_instance_id`: the EC2 instance ID * `__meta_ec2_instance_lifecycle`: the lifecycle of the EC2 instance, set only for 'spot' or 'scheduled' instances, absent otherwise * `__meta_ec2_instance_state`: the state of the EC2 instance