summaryrefslogtreecommitdiffstats
path: root/modules/graphite
diff options
context:
space:
mode:
authorMarek Vavrusa <marek@vavrusa.com>2016-02-24 07:40:17 +0100
committerMarek Vavrusa <marek@vavrusa.com>2016-02-24 07:40:17 +0100
commitbad082fa40a8b46c545b42dfcc5218ec73776a86 (patch)
tree2e2458c61d02697e591b13ede208ab82383cb9a7 /modules/graphite
parentFix 'bogus proof of DS non-existence' for non-existant DS records in the cache (diff)
downloadknot-resolver-bad082fa40a8b46c545b42dfcc5218ec73776a86.tar.xz
knot-resolver-bad082fa40a8b46c545b42dfcc5218ec73776a86.zip
modules/graphite: support for Graphite/TCP
graphite module now supports sending over TCP, if the connection is severed it will attempt to reconnect periodically. the stats module is now optional, if not loaded only core built-in stats will be transmitted
Diffstat (limited to 'modules/graphite')
-rw-r--r--modules/graphite/README.rst5
-rw-r--r--modules/graphite/graphite.lua114
2 files changed, 84 insertions, 35 deletions
diff --git a/modules/graphite/README.rst b/modules/graphite/README.rst
index 5145a00e..f324142c 100644
--- a/modules/graphite/README.rst
+++ b/modules/graphite/README.rst
@@ -12,7 +12,7 @@ Example configuration
Only the ``host`` parameter is mandatory.
-.. warning:: It uses UDP so it doesn't guarantee the delivery, make sure the target server supports UDP.
+.. info:: By default the module uses UDP so it doesn't guarantee the delivery, set ``tcp = true`` to enable Graphite over TCP. If the TCP consumer goes down or the connection with Graphite is lost, resolver will periodically attempt to reconnect with it.
.. code-block:: lua
@@ -21,7 +21,8 @@ Only the ``host`` parameter is mandatory.
prefix = hostname(), -- optional metric prefix
host = '127.0.0.1', -- graphite server address
port = 2003, -- graphite server port
- interval = 5 * sec -- publish interval
+ interval = 5 * sec, -- publish interval
+ tcp = false -- set to true if want TCP mode
}
}
diff --git a/modules/graphite/graphite.lua b/modules/graphite/graphite.lua
index f63c4bf3..c0435765 100644
--- a/modules/graphite/graphite.lua
+++ b/modules/graphite/graphite.lua
@@ -1,11 +1,73 @@
--- @module graphite
local graphite = {}
+local socket = require('socket')
+
+-- Create connected UDP socket
+local function make_udp(host, port)
+ local s, err, status
+ if host:find(':') then
+ s, err = socket.udp6()
+ else
+ s, err = socket.udp()
+ end
+ if not s then
+ return nil, err
+ end
+ status, err = s:setpeername(host, port)
+ if not status then
+ return nil, err
+ end
+ return s
+end
+
+-- Create connected TCP socket
+local function make_tcp(host, port)
+ local s, err, status
+ if host:find(':') then
+ s, err = socket.tcp6()
+ else
+ s, err = socket.tcp()
+ end
+ if not s then
+ return nil, err
+ end
+ status, err = s:connect(host, port)
+ if not status then
+ return s, err
+ end
+ return s
+end
+
+-- Send the metrics in a table to multiple Graphite consumers
+local function publish_table(metrics, prefix, now)
+ for key,val in pairs(metrics) do
+ local msg = key..' '..val..' '..now..'\n'
+ if prefix then
+ msg = prefix..'.'..msg
+ end
+ for i in ipairs(graphite.cli) do
+ local ok, err = graphite.cli[i]:send(msg)
+ if not ok then
+ -- Best-effort reconnect once per two tries
+ local tcp = graphite.cli[i]['connect'] ~= nil
+ local host = graphite.info[i]
+ if tcp and host.seen + 2 * graphite.interval / 1000 <= now then
+ print(string.format('[graphite] reconnecting: %s#%d reason: %s',
+ host.addr, host.port, err))
+ graphite.cli[i] = make_tcp(host.addr, host.port)
+ host.seen = now
+ end
+ end
+ end
+ end
+end
function graphite.init(module)
- graphite.socket = require('socket')
graphite.ev = nil
graphite.cli = {}
- graphite.prefix = nil
+ graphite.info = {}
+ graphite.interval = 5 * sec
+ graphite.prefix = 'kresd.' .. hostname()
return 0
end
@@ -17,44 +79,30 @@ end
-- @function Publish results to the Graphite server(s)
function graphite.publish()
local now = os.time()
+ -- Publish built-in statistics
if not graphite.cli then error("no graphite server configured") end
+ publish_table(cache.stats(), graphite.prefix..'.cache', now)
+ publish_table(worker.stats(), graphite.prefix..'.worker', now)
+ -- Publish extended statistics if available
+ if not stats then
+ return 0
+ end
local now_metrics = stats.list()
if type(now_metrics) ~= 'table' then
return 0 -- No metrics to watch
end
- local function publish_table(metrics, prefix)
- for key,val in pairs(metrics) do
- local msg = key..' '..val..' '..now..'\n'
- if prefix then
- msg = prefix..'.'..msg
- end
- for i in ipairs(graphite.cli) do
- graphite.cli[i]:send(msg)
- end
- end
- end
- publish_table(now_metrics, graphite.prefix)
- publish_table(cache.stats(), graphite.prefix..'.cache')
- publish_table(worker.stats(), graphite.prefix..'.worker')
+ publish_table(now_metrics, graphite.prefix, now)
return 0
end
-- @function Make connection to Graphite server.
-function graphite.add_server(graphite, host, port)
- local cli, err, status
- if host:find(':') then
- cli, err = graphite.socket.udp6()
- else
- cli, err = graphite.socket.udp()
- end
- if not cli then
- error(err)
- end
- status, err = cli:setpeername(host, port)
- if not status then
+function graphite.add_server(graphite, host, port, tcp)
+ local s, err = tcp and make_tcp(host, port) or make_udp(host, port)
+ if not s then
error(err)
end
- table.insert(graphite.cli, cli)
+ table.insert(graphite.cli, s)
+ table.insert(graphite.info, {addr = host, port = port, seen = 0})
return 0
end
@@ -62,19 +110,19 @@ function graphite.config(conf)
-- config defaults
if not conf then return 0 end
if not conf.port then conf.port = 2003 end
- if not conf.interval then conf.interval = 5 * sec end
+ if conf.interval then graphite.interval = conf.interval end
if conf.prefix then graphite.prefix = conf.prefix end
-- connect to host(s)
if type(conf.host) == 'table' then
for key, val in pairs(conf.host) do
- graphite:add_server(val, conf.port)
+ graphite:add_server(val, conf.port, conf.tcp)
end
else
- graphite:add_server(conf.host, conf.port)
+ graphite:add_server(conf.host, conf.port, conf.tcp)
end
-- start publishing stats
if graphite.ev then event.cancel(graphite.ev) end
- graphite.ev = event.recurrent(conf.interval, graphite.publish)
+ graphite.ev = event.recurrent(graphite.interval, graphite.publish)
return 0
end