diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 1bfea4041..16ae0520d 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -696,7 +696,13 @@ func main() { return errors.Wrapf(err, "opening storage failed") } - level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath)) + switch fsType := prom_runtime.Statfs(cfg.localStoragePath); fsType { + case "NFS_SUPER_MAGIC": + level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + default: + level.Info(logger).Log("fs_type", fsType) + } + level.Info(logger).Log("msg", "TSDB started") level.Debug(logger).Log("msg", "TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, diff --git a/docs/storage.md b/docs/storage.md index f599dc849..a7a50209f 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -73,7 +73,9 @@ needed_disk_space = retention_time_seconds * ingested_samples_per_second * bytes To tune the rate of ingested samples per second, you can either reduce the number of time series you scrape (fewer targets or fewer series per target), or you can increase the scrape interval. However, reducing the number of series is likely more effective, due to compression of samples within a series. -If your local storage becomes corrupted for whatever reason, your best bet is to shut down Prometheus and remove the entire storage directory. Non POSIX compliant filesystems are not supported by Prometheus's local storage, corruptions may happen, without possibility to recover. NFS is only potentially POSIX, most implementations are not. You can try removing individual block directories to resolve the problem, this means losing a time window of around two hours worth of data per block directory. Again, Prometheus's local storage is not meant as durable long-term storage. +If your local storage becomes corrupted for whatever reason, your best bet is to shut down Prometheus and remove the entire storage directory. You can try removing individual block directories, or WAL directory, to resolve the problem, this means losing a time window of around two hours worth of data per block directory. Again, Prometheus's local storage is not meant as durable long-term storage. + +CAUTION: Non-POSIX compliant filesystems are not supported by Prometheus' local storage as unrecoverable corruptions may happen. NFS filesystems (including AWS's EFS) are not supported. NFS could be POSIX-compliant, but most implementations are not. It is strongly recommended to use a local filesystem for reliability. If both time and size retention policies are specified, whichever policy triggers first will be used at that instant.