夜莺服务端部署

简介

夜莺(Nightingale)是中国计算机学会托管的开源云原生可观测工具,2020 年由滴滴孵化和开源。夜莺采用 All-in-One 的设计理念,集数据采集、可视化、监控告警于一体,与云原生生态紧密集成,融入了顶级互联网公司可观测性最佳实践,沉淀了众多社区专家经验,开箱即用。

https://n9e.github.io/

部署方式

官网最推荐的部署方式是二进制部署,不仅稳,而且升级也方便。

依赖

夜莺依赖 mysql 存储用户配置类数据,依赖 redis 存储 jwt token 和机器心跳上报的 metadata。

  • mysql:测试5.7、8.0版本的MySQL和TiDB 7.4版本共可以正常使用;官方使用mariadb。
  • redis:测试5.0版本可以正常使用,可以支持用户名密码。
  • prometheus:remote write远程写入,需要开启远程写入的功能,2.x之后版本支持。
# install mysql
yum -y install mariadb*
systemctl enable mariadb
systemctl restart mariadb
mysql -e "SET PASSWORD FOR 'root'@'localhost' = PASSWORD('xxxxxxxxx');"
mysql -e "SET PASSWORD FOR 'n9e_user'@'%' = PASSWORD('xxxxxxx');"

# install redis
yum install -y redis
systemctl enable redis
systemctl restart redis

cd /root/
wget -c https://github.com/prometheus/prometheus/releases/download/v2.47.2/prometheus-2.47.2.linux-amd64.tar.gz
tar zxvf prometheus-2.47.2.linux-amd64.tar.gz
mv prometheus-2.47.2.linux-amd64 /opt/prometheus

cat > /usr/lib/systemd/system/prometheus.service << EOF
[Unit]
Description=prometheus
After=network.target

[Service]
Type=simple
WorkingDirectory=/opt/prometheus
ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --enable-feature=remote-write-receiver --storage.tsdb.retention.size=150GB
LimitNOFILE=65536
PrivateTmp=true
RestartSec=2
StartLimitInterval=0
Restart=always

[Install]
WantedBy=multi-user.target

EOF
systemctl enable proemtheus --now

下载安装

官方已经提供二进制下载链接:https://flashcat.cloud/download/nightingale/

都是Golang写了,所以解压就可以直接运行。

安装

cd /root/

wget -c https://download.flashcat.cloud/n9e-v6.4.0-linux-amd64.tar.gz 

mkdir -p /opt/n9e/

tar zxvf n9e-v6.4.0-linux-amd64.tar.gz  -C /opt/n9e/

cd /opt/n9e

mysql -uroot -pxxxxxxxxx < n9e.sql

cat > /usr/lib/systemd/system/n9e.service <<EOF
[Unit]
Description=Nightinagle

[Service]
WorkingDirectory=/opt/n9e
ExecStart=/opt/n9e/n9e

[Install]
WantedBy=multi-user.target
EOF

配置

vim /opt/n9e/etc/config.toml

修改DB、Redis、Pushgw.Writers对应的MySQL、Redis、prometheus信息。

[Global]
RunMode = "release"

[Log]
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "INFO"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours = 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256

[HTTP]
# http listening address
Host = "127.0.0.1"
# http listening port
Port = 17000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = false
# whether enable pprof
PProf = false
# expose prometheus /metrics?
ExposeMetrics = true
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120

[HTTP.ShowCaptcha]
Enable = false 

[HTTP.APIForAgent]
Enable = true 
[HTTP.APIForAgent.BasicAuth]
n9e = "n9e@ops"

[HTTP.APIForService]
Enable = true 
[HTTP.APIForService.BasicAuth]
user001 = "xxxxxxxxxxxxxx"

[HTTP.JWTAuth]
# signing key
SigningKey = "5b94a0fd640fe2765af826acfe42d151"
# unit: min
AccessExpired = 1500
# unit: min
RefreshExpired = 10080
RedisKeyPrefix = "/jwt/"

[HTTP.ProxyAuth]
# if proxy auth enabled, jwt auth is disabled
Enable = false
# username key in http proxy header
HeaderUserNameKey = "X-User-Name"
DefaultRoles = ["Standard"]

[HTTP.RSA]
# open RSA
OpenRSA = false
# Before replacing the key file, make sure that there are no encrypted variables in the database "configs".
# It is recommended to decrypt and remove all encrypted values from the database before replacing the key file.
# This will prevent any potential issues with accessing or decrypting the variables using the new key file.
# RSA public key (auto carete)
RSAPublicKeyPath = "etc/rsa/public.pem"
# RSA private key (auto carete)
RSAPrivateKeyPath = "etc/rsa/private.pem"
# RSA private key password
RSAPassWord = "xxxxxxx"

[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# postgres: DSN="host=127.0.0.1 port=5432 user=root dbname=n9e_v6 password=1234 sslmode=disable"
DSN="n9e_user:xxxxxxx@tcp(localhost:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
# enable auto migrate or not
# EnableAutoMigrate = false

[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs)
Address = "127.0.0.1:6379"
# Username = ""
Password = "xxxxx"
DB = 1
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel
RedisType = "standalone"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
# SentinelPassword = ""

[Alert]
[Alert.Heartbeat]
# auto detect if blank
IP = ""
# unit ms
Interval = 1000
EngineName = "default"

# [Alert.Alerting]
# NotifyConcurrency = 10

[Center]
MetricsYamlFile = "./etc/metrics.yaml"
I18NHeaderKey = "X-Language"

[Center.AnonymousAccess]
PromQuerier = true
AlertDetail = true

[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
# ForceUseServerTS = false

# [Pushgw.DebugSample]
# ident = "xx"
# __name__ = "xx"

# [Pushgw.WriterOpt]
# QueueMaxSize = 1000000
# QueuePopSize = 1000

[[Pushgw.Writers]] 
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
Url = "http://1.1.1.1:9090/api/v1/write"
# Basic auth username
BasicAuthUser = ""
# Basic auth password
BasicAuthPass = ""
# timeout settings, unit: ms
Headers = ["X-From", "n9e"]
Timeout = 10000
DialTimeout = 3000
TLSHandshakeTimeout = 30000
ExpectContinueTimeout = 1000
IdleConnTimeout = 90000
# time duration, unit: ms
KeepAlive = 30000
MaxConnsPerHost = 0
MaxIdleConns = 100
MaxIdleConnsPerHost = 100
## Optional TLS Config
# UseTLS = false
# TLSCA = "/etc/n9e/ca.pem"
# TLSCert = "/etc/n9e/cert.pem"
# TLSKey = "/etc/n9e/key.pem"
# InsecureSkipVerify = false
# [[Writers.WriteRelabels]]
# Action = "replace"
# SourceLabels = ["__address__"]
# Regex = "([^:]+)(?::\\d+)?"
# Replacement = "$1:80"
# TargetLabel = "__address__"

启动验证

默认监听端口是17000

systemctl enable n9e --now
ss -antp | grep 17000

nginx转发

内网访问可以只配置80

server {
    server_name n9e-ops.meiyoucloud.com;
    listen 80;
    add_header Access-Control-Allow-Headers $http_access_control_request_headers;
    access_log off;
    location / {
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header REMOTE-HOST $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_pass http://127.0.0.1:17000/;
    }
}

外网访问可以强制走https

server {
    server_name n9e-ops.example.com;
    listen 80;
    rewrite ^/(.*) https://$server_name/$1 permanent;
}

server {
    server_name n9e-ops.example.com;
    listen 443 ssl http2;
        #ssl on;
        ssl_certificate xxxxxxx.pem;
        ssl_certificate_key xxxxxxx.key;
        ssl_session_timeout  5m;
        ssl_protocols   TLSv1 TLSv1.1 TLSv1.2;
        ssl_dhparam          dhparams.pem;
        ssl_ciphers   EECDH+CHACHA20:EECDH+CHACHA20-draft:EECDH+AES128:RSA+AES128:EECDH+AES256:RSA+AES256:EECDH+3DES:RSA+3DES:!MD5;
        ssl_session_cache          shared:SSL:50m;
        ssl_prefer_server_ciphers  on;
        ssl_session_tickets        on;
        ssl_session_ticket_key     session_ticket.key;
    add_header Access-Control-Allow-Headers $http_access_control_request_headers;
    location / {
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header REMOTE-HOST $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_pass http://127.0.0.1:17000/;
    }
}

n9e