From c5c0421caf278268db7222a850a4d7e76ef4f0cc Mon Sep 17 00:00:00 2001 From: Andrew McMillan Date: Wed, 22 Jun 2016 23:53:22 +0100 Subject: [PATCH] Add /metrics.php to be scraped by Prometheus for monitoring. --- config/example-config.php | 23 ++++++ dba/appuser_permissions.txt | 20 +++++ dba/davical.sql | 23 +++++- dba/patches/1.3.2.sql | 32 ++++++++ htdocs/metrics.php | 147 ++++++++++++++++++++++++++++++++++++ inc/CalDAVRequest.php | 99 ++++++++++++++++++++++-- 6 files changed, 336 insertions(+), 8 deletions(-) create mode 100644 dba/patches/1.3.2.sql create mode 100644 htdocs/metrics.php diff --git a/config/example-config.php b/config/example-config.php index 4bc07b9b..6028b5e8 100644 --- a/config/example-config.php +++ b/config/example-config.php @@ -438,3 +438,26 @@ $c->admin_email ='calendar-admin@example.com'; // // 'debug_jid' => 'otheruser@example.com' // send a copy of all publishes to this jid // ); // include ( 'pubsub.php' ); + + +/*************************************************************************** +* * +* Detailed Metrics * +* * +***************************************************************************/ + +/* +* This enables a /metrics.php URL containing detailed metrics about the +* operation of DAViCal. Ideally you will be running memcache if you are +* interested in keeping metrics, but there is a simple metrics collection +* available to you without running memcache. +* +* Note that there is currently no way of enabling metrics via memcache +* without memcache being enabled for all of DAViCal. +*/ +// $c->metrics_style = 'counters'; // Just the simple counter-based metrics +// $c->metrics_style = 'memcache'; // Only the metrics using memcache +// $c->metrics_style = 'both'; // Both styles of metrics +// $c->metrics_collectors = array('127.0.0.1'); // Restrict access to only this IP address +// $c->metrics_require_user = 'metricsuser'; // Restrict access to only connections authenticating as this user + diff --git a/dba/appuser_permissions.txt b/dba/appuser_permissions.txt index a7eba5be..bcd5b90f 100644 --- a/dba/appuser_permissions.txt +++ b/dba/appuser_permissions.txt @@ -58,6 +58,26 @@ GRANT SELECT,UPDATE ON principal_type_principal_type_id_seq ON sync_tokens_sync_token_seq ON timezones_our_tzno_seq + ON metrics_count_acl + ON metrics_count_bind + ON metrics_count_delete + ON metrics_count_delticket + ON metrics_count_get + ON metrics_count_head + ON metrics_count_lock + ON metrics_count_mkcalendar + ON metrics_count_mkcol + ON metrics_count_mkticket + ON metrics_count_move + ON metrics_count_options + ON metrics_count_post + ON metrics_count_propfind + ON metrics_count_proppatch + ON metrics_count_put + ON metrics_count_report + ON metrics_count_unknown + ON metrics_count_unlock + GRANT SELECT ON supported_locales diff --git a/dba/davical.sql b/dba/davical.sql index a06a5646..50fb4907 100644 --- a/dba/davical.sql +++ b/dba/davical.sql @@ -468,4 +468,25 @@ $$ LANGUAGE plpgsql ; ALTER TABLE dav_binding ADD CONSTRAINT "dav_name_does_not_exist" CHECK (NOT real_path_exists(dav_name)); -SELECT new_db_revision(1,3,1, 'Styczeń' ); +-- We create a bunch of counters for reporting basic statistics +CREATE SEQUENCE metrics_count_get; +CREATE SEQUENCE metrics_count_put; +CREATE SEQUENCE metrics_count_propfind; +CREATE SEQUENCE metrics_count_proppatch; +CREATE SEQUENCE metrics_count_report; +CREATE SEQUENCE metrics_count_head; +CREATE SEQUENCE metrics_count_options; +CREATE SEQUENCE metrics_count_post; +CREATE SEQUENCE metrics_count_mkcalendar; +CREATE SEQUENCE metrics_count_mkcol; +CREATE SEQUENCE metrics_count_delete; +CREATE SEQUENCE metrics_count_move; +CREATE SEQUENCE metrics_count_acl; +CREATE SEQUENCE metrics_count_lock; +CREATE SEQUENCE metrics_count_unlock; +CREATE SEQUENCE metrics_count_mkticket; +CREATE SEQUENCE metrics_count_delticket; +CREATE SEQUENCE metrics_count_bind; +CREATE SEQUENCE metrics_count_unknown; + +SELECT new_db_revision(1,3,2, 'Luty' ); diff --git a/dba/patches/1.3.2.sql b/dba/patches/1.3.2.sql new file mode 100644 index 00000000..db88ccd5 --- /dev/null +++ b/dba/patches/1.3.2.sql @@ -0,0 +1,32 @@ + +-- Notable enhancement: Sequence counters for reporting metrics for monitoring. + +BEGIN; +SELECT check_db_revision(1,3,1); + +CREATE SEQUENCE metrics_count_get; +CREATE SEQUENCE metrics_count_put; +CREATE SEQUENCE metrics_count_propfind; +CREATE SEQUENCE metrics_count_proppatch; +CREATE SEQUENCE metrics_count_report; +CREATE SEQUENCE metrics_count_head; +CREATE SEQUENCE metrics_count_options; +CREATE SEQUENCE metrics_count_post; +CREATE SEQUENCE metrics_count_mkcalendar; +CREATE SEQUENCE metrics_count_mkcol; +CREATE SEQUENCE metrics_count_delete; +CREATE SEQUENCE metrics_count_move; +CREATE SEQUENCE metrics_count_acl; +CREATE SEQUENCE metrics_count_lock; +CREATE SEQUENCE metrics_count_unlock; +CREATE SEQUENCE metrics_count_mkticket; +CREATE SEQUENCE metrics_count_delticket; +CREATE SEQUENCE metrics_count_bind; +CREATE SEQUENCE metrics_count_unknown; + +-- A new year! http://blogs.transparent.com/polish/names-of-the-months-and-their-meaning/ +SELECT new_db_revision(1,3,2, 'Luty' ); + +COMMIT; +ROLLBACK; + diff --git a/htdocs/metrics.php b/htdocs/metrics.php new file mode 100644 index 00000000..170ee66d --- /dev/null +++ b/htdocs/metrics.php @@ -0,0 +1,147 @@ + + * @copyright Andrew McMillan + * @license http://gnu.org/copyleft/gpl.html GNU GPL v2 or later + */ + +header("Content-type: text/plain; version=0.4.0"); + +require_once('./always.php'); + +// If necessary, validate they are coming from an allowed address +if ( isset($c->metrics_collectors) && !in_array($_SERVER['REMOTE_ADDR'], $c->metrics_collectors) ) { + echo "Nope."; + exit(0); +} + +// If necessary, validate that they are coming in as an authorized user +if ( isset($c->metrics_require_user) ) { + require_once('HTTPAuthSession.php'); + $session = new HTTPAuthSession(); + if ( $session->username != $c->metrics_require_user ) { + $session->AuthFailedResponse(); + echo "Nope."; + exit(0); + } +} + +// Validate that the metrics are actually turned on! +if ( !isset($c->metrics_style) || $c->metrics_style === false ) { + echo "Metrics are not enabled."; + exit(0); +} + +// Helper function to ensure we get the metric format consistent +function print_metric( $name, $qualifiers, $value ) { + print $name; + if ( !empty($qualifiers) ) { + print '{'; + $continuation = ''; + foreach( $qualifiers AS $k => $v ) { + if ( $continuation == '' ) { + $continuation = ','; + } else { + print $continuation; + } + printf( '%s="%s"', $k, $v); + } + print '}'; + } + echo ' ', $value, "\n"; +} + + +// If they want 'both' or 'all' or something then that's what they will get +// If they don't want counters, they must want to use memcache! +if ( $c->metrics_style != 'counters' ) { + // These are the preferred metrics, which include some internal details + // of the request processing. + include_once('AwlCache.php'); + $cache = getCacheInstance(); + + $index = unserialize($cache->get('metrics', 'index')); + print "# HELP caldav_request_status The DAViCal requests broken down by HTTP method and response status\n"; + print "# TYPE caldav_request_status counter\n"; + foreach( $index['methods'] AS $method => $ignored ) { + foreach( $index['statuses'] AS $status => $ignored ) { + $count = $cache->get('metrics', $method.':'.$status ); + if ( $count !== false ) { + print_metric("caldav_request_status", array('method'=>$method, 'status'=>$status), $count); + } + } + } + + print "\n"; + print "# HELP caldav_response_bytes The DAViCal response size by HTTP method\n"; + print "# TYPE caldav_response_bytes counter\n"; + foreach( $index['methods'] AS $method => $ignored ) { + $count = $cache->get('metrics', $method.':size'); + print_metric("caldav_request_bytes", array('method'=>$method), $count); + } + + $timings = array('script', 'query', 'flush'); + print "\n"; + print "# HELP caldav_request_microseconds The DAViCal response time taken in general, in queries, and in flushing buffers\n"; + print "# TYPE caldav_request_microseconds counter\n"; + foreach( $index['methods'] AS $method => $ignored ) { + foreach( $timings AS $timing ) { + $count = $cache->get('metrics', $method.':'.$timing.'_time'); + print_metric("caldav_request_microseconds", array('method'=>$method, 'timing'=>$timing), $count); + } + } +} + +// If they don't want memcache, they must want to use counters! +if ( $c->metrics_style != 'memcache' ) { + // These are more basic metrics. Just counts of requests, by type. + $sql = <<Exec("metrics", __LINE__ , __FILE__); + $row = (array) $qry->Fetch(); + print "\n"; + print "# HELP caldav_request_count The DAViCal requests broken down by HTTP method (get, put, propfind, etc.).\n"; + print "# TYPE caldav_request_count counter\n"; + foreach ($row as $k => $v) { + print_metric("caldav_request_count", array( "method" => str_replace("_count", "", $k)), $v); + } +} + +print "\n"; +print "# HELP davical_up Are the servers up.\n"; +print "# TYPE davical_up gauge\n"; +print_metric("davical_up", array('server'=>$c->sysabbr), 1); + +if ( function_exists('memory_get_usage') ) { + print "\n"; + print "# HELP davical_process_memory How much memory is this process using.\n"; + print "# TYPE davical_process_memory gauge\n"; + print_metric("davical_process_memory", array("pid" => getmypid(), 'type' => 'curr'), memory_get_usage()); + print_metric("davical_process_memory", array("pid" => getmypid(), 'type' => 'peak'), memory_get_peak_usage()); +} diff --git a/inc/CalDAVRequest.php b/inc/CalDAVRequest.php index 864eafb6..cf84e74a 100644 --- a/inc/CalDAVRequest.php +++ b/inc/CalDAVRequest.php @@ -269,7 +269,7 @@ class CalDAVRequest } if ( !is_int($this->depth) && "infinity" == $this->depth ) $this->depth = DEPTH_INFINITY; $this->depth = intval($this->depth); - + /** * MOVE/COPY use a "Destination" header and (optionally) an "Overwrite" one. */ @@ -1301,20 +1301,22 @@ EOSQL; } } + $script_finish = microtime(true); + $script_time = $script_finish - $c->script_start_time; + $message_length = strlen($message); if ( $message != '' ) { - if ( !headers_sent() ) header( "Content-Length: ".strlen($message) ); + if ( !headers_sent() ) header( "Content-Length: ".$message_length ); echo $message; } if ( isset($c->dbg['caldav']) && $c->dbg['caldav'] ) { - if ( strlen($message) > 100 || strstr($message, "\n") ) { - $message = substr( preg_replace("#\s+#m", ' ', $message ), 0, 100) . (strlen($message) > 100 ? "..." : ""); + if ( $message_length > 100 || strstr($message, "\n") ) { + $message = substr( preg_replace("#\s+#m", ' ', $message ), 0, 100) . ($message_length > 100 ? "..." : ""); } dbg_error_log("caldav", "Status: %d, Message: %s, User: %d, Path: %s", $status, $message, $session->principal->user_no(), $this->path); } if ( isset($c->dbg['statistics']) && $c->dbg['statistics'] ) { - $script_time = microtime(true) - $c->script_start_time; $memory = ''; if ( function_exists('memory_get_usage') ) { $memory = sprintf( ', Memory: %dk, Peak: %dk', memory_get_usage()/1024, memory_get_peak_usage(true)/1024); @@ -1327,14 +1329,97 @@ EOSQL; } catch( Exception $ignored ) {} + if ( isset($c->metrics_style) && $c->metrics_style !== false ) { + $flush_time = microtime(true) - $script_finish; + $this->DoMetrics($status, $message_length, $script_time, $flush_time); + } + if ( isset($c->exit_after_memory_exceeds) && function_exists('memory_get_peak_usage') && memory_get_peak_usage(true) > $c->exit_after_memory_exceeds ) { // 64M @dbg_error_log("statistics", "Peak memory use exceeds %d bytes (%d) - killing process %d", $c->exit_after_memory_exceeds, memory_get_peak_usage(true), getmypid()); register_shutdown_function( 'CalDAVRequest::kill_on_exit' ); } - - + exit(0); } + + /** + * Record the metrics related to this request. + * + * @param status The HTTP status code for this response + * @param response_size The size of the response (bytes). + * @param script_time The time taken to generate the response (pre-sending) + * @param flush_time The time taken to send the response (buffers flushed) + */ + function DoMetrics($status, $response_size, $script_time, $flush_time) { + global $c; + static $ns = 'metrics'; + + $method = (empty($this->method) ? 'UNKNOWN' : $this->method); + + // If they want 'both' or 'all' or something then that's what they will get + // If they don't want counters, they must want to use memcache! + if ( $c->metrics_style != 'counters' ) { + $cache = getCacheInstance(); + if ( $cache->isActive() ) { + + $base_key = $method.':'; + $count_like_this = $cache->increment( $ns, $base_key.$status ); + $cache->increment( $ns, $base_key.'size', $response_size ); + $cache->increment( $ns, $base_key.'script_time', intval($script_time * 1000000) ); + $cache->increment( $ns, $base_key.'flush_time', intval($flush_time * 1000000) ); + $cache->increment( $ns, $base_key.'query_time', intval($c->total_query_time * 1000000) ); + + if ( $count_like_this == 1 ) { + // We need to maintain a set of details regarding the methods and statuses we have + // encountered, so we know what to retrieve. Since this is the first one like + // this, we add it to the index. + try { + $index = unserialize($cache->get($ns, 'index')); + } catch (Exception $e) { + $index = array('methods' => array(), 'statuses' => array()); + } + $index['methods'][$method] = 1; + $index['statuses'][$status] = 1; + $cache->set($ns, 'index', serialize($index), 0); + } + } + else { + error_log("Full statistics are only available with a working Memcache configuration"); + } + } + + // If they don't want memcache, they must want to use counters! + if ( $c->metrics_style != 'memcache' ) { + $qstring = "SELECT nextval('%s')"; + switch( $method ) { + case 'OPTIONS': + case 'REPORT': + case 'PROPFIND': + case 'GET': + case 'PUT': + case 'HEAD': + case 'PROPPATCH': + case 'POST': + case 'MKCALENDAR': + case 'MKCOL': + case 'DELETE': + case 'MOVE': + case 'ACL': + case 'LOCK': + case 'UNLOCK': + case 'MKTICKET': + case 'DELTICKET': + case 'BIND': + $counter = strtolower($this->method); + break; + default: + $counter = 'unknown'; + break; + } + $qry = new AwlQuery( "SELECT nextval('metrics_count_" . $counter . "')" ); + $qry->Exec('always',__LINE__,__FILE__); + } + } }