From 11f14bd118bd27fff8d17e37d97328888e3afe9b Mon Sep 17 00:00:00 2001 From: Florian Sesser <florian@private.storage> Date: Wed, 2 Feb 2022 17:53:51 +0000 Subject: [PATCH] Loki: Steps towards centralized logging This is my latest version of this, updated to work with the packages in NixOS 21.05. --- morph/lib/issuer.nix | 1 + morph/lib/monitoring.nix | 4 +- .../modules/monitoring/exporters/promtail.nix | 36 ++++++++++++++++ nixos/modules/monitoring/server/loki.nix | 41 ++++++++----------- 4 files changed, 57 insertions(+), 25 deletions(-) create mode 100644 nixos/modules/monitoring/exporters/promtail.nix diff --git a/morph/lib/issuer.nix b/morph/lib/issuer.nix index 69b0527c..e791ef34 100644 --- a/morph/lib/issuer.nix +++ b/morph/lib/issuer.nix @@ -8,6 +8,7 @@ in { imports = [ ../../nixos/modules/monitoring/vpn/client.nix ../../nixos/modules/monitoring/exporters/node.nix + ../../nixos/modules/monitoring/exporters/promtail.nix ]; options.grid.issuer = { diff --git a/morph/lib/monitoring.nix b/morph/lib/monitoring.nix index 84d6fa56..9eb73825 100644 --- a/morph/lib/monitoring.nix +++ b/morph/lib/monitoring.nix @@ -30,10 +30,10 @@ in { ../../nixos/modules/monitoring/vpn/server.nix ../../nixos/modules/monitoring/server/grafana.nix ../../nixos/modules/monitoring/server/prometheus.nix + ../../nixos/modules/monitoring/server/loki.nix ../../nixos/modules/monitoring/exporters/node.nix ../../nixos/modules/monitoring/exporters/blackbox.nix - # Loki 0.3.0 from Nixpkgs 19.09 is too old and does not work: - # ../../nixos/modules/monitoring/server/loki.nix + ../../nixos/modules/monitoring/exporters/promtail.nix ]; options.grid.monitoring = { diff --git a/nixos/modules/monitoring/exporters/promtail.nix b/nixos/modules/monitoring/exporters/promtail.nix new file mode 100644 index 00000000..b52df5ce --- /dev/null +++ b/nixos/modules/monitoring/exporters/promtail.nix @@ -0,0 +1,36 @@ +# Promtail log forwarder configuration +# +# Scope: Tail logs on the local system and send them to Loki +# +# Description: This is not strictly an "exporter" like the Prometheus +# exporters, but it is very similar in what it is doing - +# preparing local data and sending it off to a TSDB. + +{ + config.services.promtail.enable = true; + config.networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 9080 ]; + config.services.promtail.configuration = { + server = { + http_listen_port = 9080; # Using /metrics for health check + grpc_listen_address = "127.0.0.1"; # unused, but no option to turn it off. + grpc_listen_port = 9095; # unused, but no option to turn it off. + }; + + clients = [{ + url = "http://monitoring:3100/loki/api/v1/push"; + }]; + + scrape_configs = [{ + job_name = "systemd-journal"; + journal = { + labels = { + job = "systemd-journal"; + }; + }; + relabel_configs = [{ + source_labels = [ "__journal__systemd_unit" ]; + target_label = "unit"; + }]; + }]; + }; +} diff --git a/nixos/modules/monitoring/server/loki.nix b/nixos/modules/monitoring/server/loki.nix index 96554523..b137d6e3 100644 --- a/nixos/modules/monitoring/server/loki.nix +++ b/nixos/modules/monitoring/server/loki.nix @@ -1,9 +1,14 @@ # Loki Server # -# Scope: Log aggregator +# Scope: Log ingester and aggregator to be run on the monitoring node +# +# See also: +# - The configuration is adapted from +# https://grafana.com/docs/loki/latest/configuration/examples/#complete-local-configyaml +# { - config.networking.firewall.allowedTCPPorts = [ 3100 ]; + config.networking.firewall.interfaces.monitoringvpn.allowedTCPPorts = [ 3100 9095 ]; config.services.loki = { enable = true; @@ -14,63 +19,53 @@ server = { http_listen_port = 3100; + grpc_listen_port = 9095; }; ingester = { lifecycler = { - address = "0.0.0.0"; + interface_names = [ "monitoringvpn"]; ring = { kvstore = { store = "inmemory"; }; replication_factor = 1; }; - final_sleep = "0s"; + final_sleep = "0s"; }; - chunk_idle_period = "1h"; # Any chunk not receiving new logs in this time will be flushed - max_chunk_age = "1h"; # All chunks will be flushed when they hit this age, default is 1h - chunk_target_size = 1048576; # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first - chunk_retain_period = "30s"; # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) + chunk_target_size = 1536000; # As per https://grafana.com/docs/loki/v2.2.1/best-practices/ max_transfer_retries = 0; # Chunk transfers disabled }; schema_config = { configs = [{ - from = "2020-10-24"; # TODO: Should this be "today"? - store = "boltdb-shipper"; + from = "2020-12-26"; + store = "boltdb"; object_store = "filesystem"; schema = "v11"; index = { prefix = "index_"; - period = "24h"; }; }]; }; storage_config = { - boltdb_shipper = { - active_index_directory = "/var/lib/loki/boltdb-shipper-active"; - cache_location = "/var/lib/loki/boltdb-shipper-cache"; - cache_ttl = "24h"; # Can be increased for faster performance over longer query periods, uses more disk space - shared_store = "filesystem"; + boltdb = { + directory = "/var/lib/loki/index"; }; + filesystem = { directory = "/var/lib/loki/chunks"; }; }; - limits_config = { - reject_old_samples = true; - reject_old_samples_max_age = "168h"; - }; - chunk_store_config = { - max_look_back_period = "336h"; + max_look_back_period = "336h"; # two weeks }; table_manager = { retention_deletes_enabled = true; - retention_period = "336h"; + retention_period = "336h"; # two weeks }; }; }; -- GitLab