2019-06-18 13:34:46 +00:00
// Copyright 2019 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pagerduty
import (
"bytes"
"context"
"encoding/json"
2023-11-24 21:17:35 +00:00
"errors"
2019-06-18 13:34:46 +00:00
"fmt"
"io"
2024-11-06 09:09:57 +00:00
"log/slog"
2019-06-18 13:34:46 +00:00
"net/http"
2022-10-14 12:55:59 +00:00
"os"
2019-06-18 13:34:46 +00:00
"strings"
2020-05-15 13:15:18 +00:00
"github.com/alecthomas/units"
2019-06-18 13:34:46 +00:00
commoncfg "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/template"
"github.com/prometheus/alertmanager/types"
)
2022-11-25 19:47:20 +00:00
const (
maxEventSize int = 512000
// https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTc4-send-a-v1-event - 1024 characters or runes.
maxV1DescriptionLenRunes = 1024
// https://developer.pagerduty.com/docs/ZG9jOjExMDI5NTgx-send-an-alert-event - 1024 characters or runes.
maxV2SummaryLenRunes = 1024
)
2020-05-15 13:15:18 +00:00
2019-06-18 13:34:46 +00:00
// Notifier implements a Notifier for PagerDuty notifications.
type Notifier struct {
2019-08-02 14:17:40 +00:00
conf * config . PagerdutyConfig
tmpl * template . Template
2024-11-06 09:09:57 +00:00
logger * slog . Logger
2019-08-02 14:17:40 +00:00
apiV1 string // for tests.
client * http . Client
retrier * notify . Retrier
2019-06-18 13:34:46 +00:00
}
// New returns a new PagerDuty notifier.
2024-11-06 09:09:57 +00:00
func New ( c * config . PagerdutyConfig , t * template . Template , l * slog . Logger , httpOpts ... commoncfg . HTTPClientOption ) ( * Notifier , error ) {
2021-11-10 16:28:47 +00:00
client , err := commoncfg . NewClientFromConfig ( * c . HTTPConfig , "pagerduty" , httpOpts ... )
2019-06-18 13:34:46 +00:00
if err != nil {
return nil , err
}
n := & Notifier { conf : c , tmpl : t , logger : l , client : client }
2022-10-14 12:55:59 +00:00
if c . ServiceKey != "" || c . ServiceKeyFile != "" {
2019-06-18 13:34:46 +00:00
n . apiV1 = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
2019-08-02 14:17:40 +00:00
// Retrying can solve the issue on 403 (rate limiting) and 5xx response codes.
// https://v2.developer.pagerduty.com/docs/trigger-events
n . retrier = & notify . Retrier { RetryCodes : [ ] int { http . StatusForbidden } , CustomDetailsFunc : errDetails }
} else {
// Retrying can solve the issue on 429 (rate limiting) and 5xx response codes.
// https://v2.developer.pagerduty.com/docs/events-api-v2#api-response-codes--retry-logic
n . retrier = & notify . Retrier { RetryCodes : [ ] int { http . StatusTooManyRequests } , CustomDetailsFunc : errDetails }
2019-06-18 13:34:46 +00:00
}
return n , nil
}
const (
pagerDutyEventTrigger = "trigger"
pagerDutyEventResolve = "resolve"
)
type pagerDutyMessage struct {
RoutingKey string ` json:"routing_key,omitempty" `
ServiceKey string ` json:"service_key,omitempty" `
DedupKey string ` json:"dedup_key,omitempty" `
IncidentKey string ` json:"incident_key,omitempty" `
EventType string ` json:"event_type,omitempty" `
Description string ` json:"description,omitempty" `
EventAction string ` json:"event_action" `
Payload * pagerDutyPayload ` json:"payload" `
Client string ` json:"client,omitempty" `
ClientURL string ` json:"client_url,omitempty" `
Details map [ string ] string ` json:"details,omitempty" `
Images [ ] pagerDutyImage ` json:"images,omitempty" `
Links [ ] pagerDutyLink ` json:"links,omitempty" `
}
type pagerDutyLink struct {
HRef string ` json:"href" `
Text string ` json:"text" `
}
type pagerDutyImage struct {
Src string ` json:"src" `
Alt string ` json:"alt" `
2019-06-24 10:19:06 +00:00
Href string ` json:"href" `
2019-06-18 13:34:46 +00:00
}
type pagerDutyPayload struct {
Summary string ` json:"summary" `
Source string ` json:"source" `
Severity string ` json:"severity" `
Timestamp string ` json:"timestamp,omitempty" `
Class string ` json:"class,omitempty" `
Component string ` json:"component,omitempty" `
Group string ` json:"group,omitempty" `
CustomDetails map [ string ] string ` json:"custom_details,omitempty" `
}
2020-05-15 13:15:18 +00:00
func ( n * Notifier ) encodeMessage ( msg * pagerDutyMessage ) ( bytes . Buffer , error ) {
var buf bytes . Buffer
if err := json . NewEncoder ( & buf ) . Encode ( msg ) ; err != nil {
2023-11-24 21:17:35 +00:00
return buf , fmt . Errorf ( "failed to encode PagerDuty message: %w" , err )
2020-05-15 13:15:18 +00:00
}
if buf . Len ( ) > maxEventSize {
truncatedMsg := fmt . Sprintf ( "Custom details have been removed because the original event exceeds the maximum size of %s" , units . MetricBytes ( maxEventSize ) . String ( ) )
if n . apiV1 != "" {
msg . Details = map [ string ] string { "error" : truncatedMsg }
} else {
msg . Payload . CustomDetails = map [ string ] string { "error" : truncatedMsg }
}
warningMsg := fmt . Sprintf ( "Truncated Details because message of size %s exceeds limit %s" , units . MetricBytes ( buf . Len ( ) ) . String ( ) , units . MetricBytes ( maxEventSize ) . String ( ) )
2024-11-06 09:09:57 +00:00
n . logger . Warn ( warningMsg )
2020-05-15 13:15:18 +00:00
buf . Reset ( )
if err := json . NewEncoder ( & buf ) . Encode ( msg ) ; err != nil {
2023-11-24 21:17:35 +00:00
return buf , fmt . Errorf ( "failed to encode PagerDuty message: %w" , err )
2020-05-15 13:15:18 +00:00
}
}
return buf , nil
}
2019-06-18 13:34:46 +00:00
func ( n * Notifier ) notifyV1 (
ctx context . Context ,
eventType string ,
key notify . Key ,
data * template . Data ,
details map [ string ] string ,
as ... * types . Alert ,
) ( bool , error ) {
var tmplErr error
tmpl := notify . TmplText ( n . tmpl , data , & tmplErr )
2022-11-25 19:47:20 +00:00
description , truncated := notify . TruncateInRunes ( tmpl ( n . conf . Description ) , maxV1DescriptionLenRunes )
2019-06-18 13:34:46 +00:00
if truncated {
2024-11-06 09:09:57 +00:00
n . logger . Warn ( "Truncated description" , "key" , key , "max_runes" , maxV1DescriptionLenRunes )
2019-06-18 13:34:46 +00:00
}
2022-10-14 12:55:59 +00:00
serviceKey := string ( n . conf . ServiceKey )
if serviceKey == "" {
content , fileErr := os . ReadFile ( n . conf . ServiceKeyFile )
if fileErr != nil {
2023-11-24 21:17:35 +00:00
return false , fmt . Errorf ( "failed to read service key from file: %w" , fileErr )
2022-10-14 12:55:59 +00:00
}
serviceKey = strings . TrimSpace ( string ( content ) )
}
2019-06-18 13:34:46 +00:00
msg := & pagerDutyMessage {
2022-10-14 12:55:59 +00:00
ServiceKey : tmpl ( serviceKey ) ,
2019-06-18 13:34:46 +00:00
EventType : eventType ,
IncidentKey : key . Hash ( ) ,
Description : description ,
Details : details ,
}
if eventType == pagerDutyEventTrigger {
msg . Client = tmpl ( n . conf . Client )
msg . ClientURL = tmpl ( n . conf . ClientURL )
}
if tmplErr != nil {
2023-11-24 21:17:35 +00:00
return false , fmt . Errorf ( "failed to template PagerDuty v1 message: %w" , tmplErr )
2019-10-29 09:46:40 +00:00
}
// Ensure that the service key isn't empty after templating.
if msg . ServiceKey == "" {
return false , errors . New ( "service key cannot be empty" )
2019-06-18 13:34:46 +00:00
}
2020-05-15 13:15:18 +00:00
encodedMsg , err := n . encodeMessage ( msg )
if err != nil {
return false , err
2019-06-18 13:34:46 +00:00
}
2020-05-15 13:15:18 +00:00
resp , err := notify . PostJSON ( ctx , n . client , n . apiV1 , & encodedMsg )
2019-06-18 13:34:46 +00:00
if err != nil {
2023-11-24 21:17:35 +00:00
return true , fmt . Errorf ( "failed to post message to PagerDuty v1: %w" , err )
2019-06-18 13:34:46 +00:00
}
defer notify . Drain ( resp )
2019-08-02 14:17:40 +00:00
return n . retrier . Check ( resp . StatusCode , resp . Body )
2019-06-18 13:34:46 +00:00
}
func ( n * Notifier ) notifyV2 (
ctx context . Context ,
eventType string ,
key notify . Key ,
data * template . Data ,
details map [ string ] string ,
as ... * types . Alert ,
) ( bool , error ) {
var tmplErr error
tmpl := notify . TmplText ( n . tmpl , data , & tmplErr )
if n . conf . Severity == "" {
n . conf . Severity = "error"
}
2022-11-25 19:47:20 +00:00
summary , truncated := notify . TruncateInRunes ( tmpl ( n . conf . Description ) , maxV2SummaryLenRunes )
2019-06-18 13:34:46 +00:00
if truncated {
2024-11-06 09:09:57 +00:00
n . logger . Warn ( "Truncated summary" , "key" , key , "max_runes" , maxV2SummaryLenRunes )
2019-06-18 13:34:46 +00:00
}
2022-10-14 12:55:59 +00:00
routingKey := string ( n . conf . RoutingKey )
if routingKey == "" {
content , fileErr := os . ReadFile ( n . conf . RoutingKeyFile )
if fileErr != nil {
2023-11-24 21:17:35 +00:00
return false , fmt . Errorf ( "failed to read routing key from file: %w" , fileErr )
2022-10-14 12:55:59 +00:00
}
routingKey = strings . TrimSpace ( string ( content ) )
}
2019-06-18 13:34:46 +00:00
msg := & pagerDutyMessage {
Client : tmpl ( n . conf . Client ) ,
ClientURL : tmpl ( n . conf . ClientURL ) ,
2022-10-14 12:55:59 +00:00
RoutingKey : tmpl ( routingKey ) ,
2019-06-18 13:34:46 +00:00
EventAction : eventType ,
DedupKey : key . Hash ( ) ,
2020-09-25 15:31:22 +00:00
Images : make ( [ ] pagerDutyImage , 0 , len ( n . conf . Images ) ) ,
Links : make ( [ ] pagerDutyLink , 0 , len ( n . conf . Links ) ) ,
2019-06-18 13:34:46 +00:00
Payload : & pagerDutyPayload {
Summary : summary ,
2022-10-13 12:01:46 +00:00
Source : tmpl ( n . conf . Source ) ,
2019-06-18 13:34:46 +00:00
Severity : tmpl ( n . conf . Severity ) ,
CustomDetails : details ,
Class : tmpl ( n . conf . Class ) ,
Component : tmpl ( n . conf . Component ) ,
Group : tmpl ( n . conf . Group ) ,
} ,
}
2020-09-25 15:31:22 +00:00
for _ , item := range n . conf . Images {
image := pagerDutyImage {
Src : tmpl ( item . Src ) ,
Alt : tmpl ( item . Alt ) ,
Href : tmpl ( item . Href ) ,
}
if image . Src != "" {
msg . Images = append ( msg . Images , image )
}
2019-06-18 13:34:46 +00:00
}
2020-09-25 15:31:22 +00:00
for _ , item := range n . conf . Links {
link := pagerDutyLink {
HRef : tmpl ( item . Href ) ,
Text : tmpl ( item . Text ) ,
}
if link . HRef != "" {
msg . Links = append ( msg . Links , link )
}
2019-06-18 13:34:46 +00:00
}
if tmplErr != nil {
2023-11-24 21:17:35 +00:00
return false , fmt . Errorf ( "failed to template PagerDuty v2 message: %w" , tmplErr )
2019-10-29 09:46:40 +00:00
}
// Ensure that the routing key isn't empty after templating.
if msg . RoutingKey == "" {
return false , errors . New ( "routing key cannot be empty" )
2019-06-18 13:34:46 +00:00
}
2020-05-15 13:15:18 +00:00
encodedMsg , err := n . encodeMessage ( msg )
if err != nil {
return false , err
2019-06-18 13:34:46 +00:00
}
2020-05-15 13:15:18 +00:00
resp , err := notify . PostJSON ( ctx , n . client , n . conf . URL . String ( ) , & encodedMsg )
2019-06-18 13:34:46 +00:00
if err != nil {
2023-11-24 21:17:35 +00:00
return true , fmt . Errorf ( "failed to post message to PagerDuty: %w" , err )
2019-06-18 13:34:46 +00:00
}
defer notify . Drain ( resp )
2023-04-07 15:05:17 +00:00
retry , err := n . retrier . Check ( resp . StatusCode , resp . Body )
if err != nil {
return retry , notify . NewErrorWithReason ( notify . GetFailureReasonFromStatusCode ( resp . StatusCode ) , err )
}
return retry , err
2019-06-18 13:34:46 +00:00
}
// Notify implements the Notifier interface.
func ( n * Notifier ) Notify ( ctx context . Context , as ... * types . Alert ) ( bool , error ) {
key , err := notify . ExtractGroupKey ( ctx )
if err != nil {
return false , err
}
var (
alerts = types . Alerts ( as ... )
data = notify . GetTemplateData ( ctx , n . tmpl , as , n . logger )
eventType = pagerDutyEventTrigger
)
if alerts . Status ( ) == model . AlertResolved {
eventType = pagerDutyEventResolve
}
2024-11-06 09:09:57 +00:00
n . logger . Debug ( "extracted group key" , "key" , key , "eventType" , eventType )
2019-06-18 13:34:46 +00:00
details := make ( map [ string ] string , len ( n . conf . Details ) )
for k , v := range n . conf . Details {
detail , err := n . tmpl . ExecuteTextString ( v , data )
if err != nil {
2023-11-24 21:17:35 +00:00
return false , fmt . Errorf ( "%q: failed to template %q: %w" , k , v , err )
2019-06-18 13:34:46 +00:00
}
details [ k ] = detail
}
if n . apiV1 != "" {
return n . notifyV1 ( ctx , eventType , key , data , details , as ... )
}
return n . notifyV2 ( ctx , eventType , key , data , details , as ... )
}
2019-08-02 14:17:40 +00:00
func errDetails ( status int , body io . Reader ) string {
2019-06-18 13:34:46 +00:00
// See https://v2.developer.pagerduty.com/docs/trigger-events for the v1 events API.
// See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 for the v2 events API.
2019-08-02 14:17:40 +00:00
if status != http . StatusBadRequest || body == nil {
return ""
}
var pgr struct {
2019-06-18 13:34:46 +00:00
Status string ` json:"status" `
Message string ` json:"message" `
Errors [ ] string ` json:"errors" `
}
2019-08-02 14:17:40 +00:00
if err := json . NewDecoder ( body ) . Decode ( & pgr ) ; err != nil {
return ""
2019-06-18 13:34:46 +00:00
}
2019-08-02 14:17:40 +00:00
return fmt . Sprintf ( "%s: %s" , pgr . Message , strings . Join ( pgr . Errors , "," ) )
2019-06-18 13:34:46 +00:00
}