diff options
author | Graham Leggett <minfrin@apache.org> | 2009-08-27 00:46:35 +0200 |
---|---|---|
committer | Graham Leggett <minfrin@apache.org> | 2009-08-27 00:46:35 +0200 |
commit | 2c379cc6fb1622a808e3b56405fc8d1eefc982f2 (patch) | |
tree | a90605ef9213493efcf60fe01e01c8d3615a08e3 | |
parent | Fix shm, rmm, and lock problems. (diff) | |
download | apache2-2c379cc6fb1622a808e3b56405fc8d1eefc982f2.tar.xz apache2-2c379cc6fb1622a808e3b56405fc8d1eefc982f2.zip |
mod_cache: Introduce the thundering herd lock, a mechanism to keep
the flood of requests at bay that strike a backend webserver as
a cached entity goes stale.
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@808212 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r-- | CHANGES | 4 | ||||
-rw-r--r-- | docs/manual/mod/mod_cache.xml | 126 | ||||
-rw-r--r-- | modules/cache/cache_storage.c | 2 | ||||
-rw-r--r-- | modules/cache/cache_util.c | 231 | ||||
-rw-r--r-- | modules/cache/mod_cache.c | 233 | ||||
-rw-r--r-- | modules/cache/mod_cache.h | 88 |
6 files changed, 617 insertions, 67 deletions
@@ -2,6 +2,10 @@ Changes with Apache 2.3.3 + *) mod_cache: Introduce the thundering herd lock, a mechanism to keep + the flood of requests at bay that strike a backend webserver as + a cached entity goes stale. [Graham Leggett] + *) mod_auth_digest: Fix usage of shared memory and re-enable it. PR 16057 [Dan Poirier] diff --git a/docs/manual/mod/mod_cache.xml b/docs/manual/mod/mod_cache.xml index e5c360aca5..004af8468d 100644 --- a/docs/manual/mod/mod_cache.xml +++ b/docs/manual/mod/mod_cache.xml @@ -96,6 +96,70 @@ </example> </section> +<section id="thunderingherd"><title>Avoiding the Thundering Herd</title> + <p>When a cached entry becomes stale, <module>mod_cache</module> will submit + a conditional request to the backend, which is expected to confirm whether the + cached entry is still fresh, and send an updated entity if not.</p> + <p>A small but finite amount of time exists between the time the cached entity + becomes stale, and the time the stale entity is fully refreshed. On a busy + server, a significant number of requests might arrive during this time, and + cause a <strong>thundering herd</strong> of requests to strike the backend + suddenly and unpredicably.</p> + <p>To keep the thundering herd at bay, the <directive>CacheLock</directive> + directive can be used to define a directory in which locks are created for + URLs <strong>in flight</strong>. The lock is used as a <strong>hint</strong> + by other requests to either suppress an attempt to cache (someone else has + gone to fetch the entity), or to indicate that a stale entry is being refreshed + (stale content will be returned in the mean time). + </p> + <section> + <title>Initial caching of an entry</title> + <p>When an entity is cached for the first time, a lock will be created for the + entity until the response has been fully cached. During the lifetime of the + lock, the cache will suppress the second and subsequent attempt to cache the + same entity. While this doesn't hold back the thundering herd, it does stop + the cache attempting to cache the same entity multiple times simultaneously. + </p> + </section> + <section> + <title>Refreshment of a stale entry</title> + <p>When an entity reaches its freshness lifetime and becomes stale, a lock + will be created for the entity until the response has either been confirmed as + still fresh, or replaced by the backend. During the lifetime of the lock, the + second and subsequent incoming request will cause stale data to be returned, + and the thundering herd is kept at bay.</p> + </section> + <section> + <title>Locks and Cache-Control: no-cache</title> + <p>Locks are used as a <strong>hint only</strong> to enable the cache to be + more gentle on backend servers, however the lock can be overridden if necessary. + If the client sends a request with a Cache-Control header forcing a reload, any + lock that may be present will be ignored, and the client's request will be + honoured immediately and the cached entry refreshed.</p> + <p>As a further safety mechanism, locks have a configurable maximum age. + Once this age has been reached, the lock is removed, and a new request is + given the opportunity to create a new lock. This maximum age can be set using + the <directive>CacheLockMaxAge</directive> directive, and defaults to 5 + seconds. + </p> + </section> + <section> + <title>Example configuration</title> + <example><title>Enabling the cache lock</title> + #<br /> + # Enable the cache lock<br /> + #<br /> + <IfModule mod_cache.c><br /> + <indent> + CacheLock on<br /> + CacheLockPath /tmp/mod_cache-lock<br /> + CacheLockMaxAge 5<br /> + </indent> + </IfModule> + </example> + </section> +</section> + <directivesynopsis> <name>CacheEnable</name> <description>Enable caching of specified URLs using a specified storage @@ -493,4 +557,66 @@ LastModified date.</description> <seealso><directive module="mod_cache">CacheIgnoreCacheControl</directive></seealso> <seealso><directive module="mod_cache">CacheStorePrivate</directive></seealso> </directivesynopsis> + +<directivesynopsis> +<name>CacheLock</name> +<description>Enable the thundering herd lock.</description> +<syntax>CacheLock <var>on|off</var></syntax> +<default>CacheLock off</default> +<contextlist><context>server config</context><context>virtual host</context> +</contextlist> + +<usage> + <p>The <directive>CacheLock</directive> directive enables the thundering herd lock + for the given URL space.</p> + + <p>In a minimal configuration the following directive is all that is needed to + enable the thundering herd lock in the default system temp directory.</p> + + <example> + # Enable chache lock<br /> + CacheLock on<br /><br /> + </example> + +</usage> +</directivesynopsis> + +<directivesynopsis> +<name>CacheLockPath</name> +<description>Set the lock path directory.</description> +<syntax>CacheLockPath <var>directory</var></syntax> +<default>CacheLockPath /tmp/mod_cache-lock</default> +<contextlist><context>server config</context><context>virtual host</context> +</contextlist> + +<usage> + <p>The <directive>CacheLockPath</directive> directive allows you to specify the + directory in which the locks are created. By default, the system's temporary + folder is used. Locks consist of empty files that only exist for stale URLs + in flight, so is significantly less resource intensive than the traditional + disk cache.</p> + +</usage> +</directivesynopsis> + +<directivesynopsis> +<name>CacheLockMaxAge</name> +<description>Set the maximum possible age of a cache lock.</description> +<syntax>CacheLockMaxAge <var>integer</var></syntax> +<default>CacheLockMaxAge 5</default> +<contextlist><context>server config</context><context>virtual host</context> +</contextlist> + +<usage> + <p>The <directive>CacheLockMaxAge</directive> directive specifies the maximum + age of any cache lock.</p> + + <p>A lock older than this value in seconds will be ignored, and the next + incoming request will be given the opportunity to re-establish the lock. + This mechanism prevents a slow client taking an excessively long time to refresh + an entity.</p> + +</usage> +</directivesynopsis> + </modulesynopsis> diff --git a/modules/cache/cache_storage.c b/modules/cache/cache_storage.c index 08411f9cb1..c122bddbe0 100644 --- a/modules/cache/cache_storage.c +++ b/modules/cache/cache_storage.c @@ -35,7 +35,7 @@ int cache_remove_url(cache_request_rec *cache, apr_pool_t *p) /* Remove the stale cache entry if present. If not, we're * being called from outside of a request; remove the - * non-stalle handle. + * non-stale handle. */ h = cache->stale_handle ? cache->stale_handle : cache->handle; if (!h) { diff --git a/modules/cache/cache_util.c b/modules/cache/cache_util.c index bc8dbd8f51..3b70af29ae 100644 --- a/modules/cache/cache_util.c +++ b/modules/cache/cache_util.c @@ -20,6 +20,8 @@ /* -------------------------------------------------------------- */ +extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key; + extern module AP_MODULE_DECLARE_DATA cache_module; /* Determine if "url" matches the hostname, scheme and port and path @@ -162,9 +164,175 @@ CACHE_DECLARE(apr_int64_t) ap_cache_current_age(cache_info *info, return apr_time_sec(current_age); } +/** + * Try obtain a cache wide lock on the given cache key. + * + * If we return APR_SUCCESS, we obtained the lock, and we are clear to + * proceed to the backend. If we return APR_EEXISTS, the the lock is + * already locked, someone else has gone to refresh the backend data + * already, so we must return stale data with a warning in the mean + * time. If we return anything else, then something has gone pear + * shaped, and we allow the request through to the backend regardless. + * + * This lock is created from the request pool, meaning that should + * something go wrong and the lock isn't deleted on return of the + * request headers from the backend for whatever reason, at worst the + * lock will be cleaned up when the request is dies or finishes. + * + * If something goes truly bananas and the lock isn't deleted when the + * request dies, the lock will be trashed when its max-age is reached, + * or when a request arrives containing a Cache-Control: no-cache. At + * no point is it possible for this lock to permanently deny access to + * the backend. + */ +CACHE_DECLARE(apr_status_t) ap_cache_try_lock(cache_server_conf *conf, + request_rec *r, char *key) { + apr_status_t status; + const char *lockname; + const char *path; + char dir[5]; + apr_time_t now = apr_time_now(); + apr_finfo_t finfo; + finfo.mtime = 0; + apr_file_t *lockfile; + + if (!conf || !conf->lock || !conf->lockpath) { + /* no locks configured, leave */ + return APR_SUCCESS; + } + + /* create the key if it doesn't exist */ + if (!key) { + cache_generate_key(r, r->pool, &key); + } + + /* create a hashed filename from the key, and save it for later */ + lockname = ap_cache_generate_name(r->pool, 0, 0, key); + + /* lock files represent discrete just-went-stale URLs "in flight", so + * we support a simple two level directory structure, more is overkill. + */ + dir[0] = '/'; + dir[1] = lockname[0]; + dir[2] = '/'; + dir[3] = lockname[1]; + dir[4] = 0; + + /* make the directories */ + path = apr_pstrcat(r->pool, conf->lockpath, dir, NULL); + if (APR_SUCCESS != (status = apr_dir_make_recursive(path, + APR_UREAD|APR_UWRITE|APR_UEXECUTE, r->pool))) { + ap_log_error(APLOG_MARK, APLOG_ERR, status, r->server, + "Could not create a cache lock directory: %s", + path); + return status; + } + lockname = apr_pstrcat(r->pool, path, "/", lockname, NULL); + apr_pool_userdata_set(lockname, CACHE_LOCKNAME_KEY, NULL, r->pool); + + /* is an existing lock file too old? */ + status = apr_stat(&finfo, lockname, + APR_FINFO_MTIME | APR_FINFO_NLINK, r->pool); + if (!APR_STATUS_IS_ENOENT(status) && APR_SUCCESS != status) { + ap_log_error(APLOG_MARK, APLOG_ERR, APR_EEXIST, r->server, + "Could not stat a cache lock file: %s", + lockname); + return status; + } + if (APR_SUCCESS == status && ((now - finfo.mtime) > conf->lockmaxage) + || (now < finfo.mtime)) { + ap_log_error(APLOG_MARK, APLOG_INFO, status, r->server, + "Cache lock file for '%s' too old, removing: %s", + r->uri, lockname); + apr_file_remove(lockname, r->pool); + } + + /* try obtain a lock on the file */ + if (APR_SUCCESS == (status = apr_file_open(&lockfile, lockname, + APR_WRITE | APR_CREATE | APR_EXCL | APR_DELONCLOSE, + APR_UREAD | APR_UWRITE, r->pool))) { + apr_pool_userdata_set(lockfile, CACHE_LOCKFILE_KEY, NULL, r->pool); + } + return status; + +} + +/** + * Remove the cache lock, if present. + * + * First, try to close the file handle, whose delete-on-close should + * kill the file. Otherwise, just delete the file by name. + * + * If no lock name has yet been calculated, do the calculation of the + * lock name first before trying to delete the file. + * + * If an optional bucket brigade is passed, the lock will only be + * removed if the bucket brigade contains an EOS bucket. + */ +CACHE_DECLARE(apr_status_t) ap_cache_remove_lock(cache_server_conf *conf, + request_rec *r, char *key, apr_bucket_brigade *bb) { + void *dummy; + const char *lockname; + + if (!conf || !conf->lock || !conf->lockpath) { + /* no locks configured, leave */ + return APR_SUCCESS; + } + if (bb) { + apr_bucket *e; + int eos_found = 0; + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + if (APR_BUCKET_IS_EOS(e)) { + eos_found = 1; + break; + } + } + if (!eos_found) { + /* no eos found in brigade, don't delete anything just yet, + * we are not done. + */ + return APR_SUCCESS; + } + } + apr_pool_userdata_get(&dummy, CACHE_LOCKFILE_KEY, r->pool); + if (dummy) { + return apr_file_close((apr_file_t *)dummy); + } + apr_pool_userdata_get(&dummy, CACHE_LOCKNAME_KEY, r->pool); + lockname = (const char *)dummy; + if (!lockname) { + const char *path; + char dir[5]; + + /* create the key if it doesn't exist */ + if (!key) { + cache_generate_key(r, r->pool, &key); + } + + /* create a hashed filename from the key, and save it for later */ + lockname = ap_cache_generate_name(r->pool, 0, 0, key); + + /* lock files represent discrete just-went-stale URLs "in flight", so + * we support a simple two level directory structure, more is overkill. + */ + dir[0] = '/'; + dir[1] = lockname[0]; + dir[2] = '/'; + dir[3] = lockname[1]; + dir[4] = 0; + + lockname = apr_pstrcat(r->pool, conf->lockpath, dir, "/", lockname, NULL); + } + return apr_file_remove(lockname, r->pool); +} + CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, request_rec *r) { + apr_status_t status; apr_int64_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale; apr_int64_t minfresh; const char *cc_cresp, *cc_req; @@ -359,7 +527,7 @@ CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, } /* * If none of Expires, Cache-Control: max-age, or Cache-Control: - * s-maxage appears in the response, and the respose header age + * s-maxage appears in the response, and the response header age * calculated is more than 24 hours add the warning 113 */ if ((maxage_cresp == -1) && (smaxage == -1) && @@ -378,7 +546,64 @@ CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, return 1; /* Cache object is fresh (enough) */ } - return 0; /* Cache object is stale */ + /* + * At this point we are stale, but: if we are under load, we may let + * a significant number of stale requests through before the first + * stale request successfully revalidates itself, causing a sudden + * unexpected thundering herd which in turn brings angst and drama. + * + * So. + * + * We want the first stale request to go through as normal. But the + * second and subsequent request, we must pretend to be fresh until + * the first request comes back with either new content or confirmation + * that the stale content is still fresh. + * + * To achieve this, we create a very simple file based lock based on + * the key of the cached object. We attempt to open the lock file with + * exclusive write access. If we succeed, woohoo! we're first, and we + * follow the stale path to the backend server. If we fail, oh well, + * we follow the fresh path, and avoid being a thundering herd. + * + * The lock lives only as long as the stale request that went on ahead. + * If the request succeeds, the lock is deleted. If the request fails, + * the lock is deleted, and another request gets to make a new lock + * and try again. + * + * At any time, a request marked "no-cache" will force a refresh, + * ignoring the lock, ensuring an extended lockout is impossible. + * + * A lock that exceeds a maximum age will be deleted, and another + * request gets to make a new lock and try again. + */ + status = ap_cache_try_lock(conf, r, (char *)h->cache_obj->key); + if (APR_SUCCESS == status) { + /* we obtained a lock, follow the stale path */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "Cache lock obtained for stale cached URL, " + "revalidating entry: %s", + r->unparsed_uri); + return 0; + } + else if (APR_EEXIST == status) { + /* lock already exists, return stale data anyway, with a warning */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "Cache already locked for stale cached URL, " + "pretend it is fresh: %s", + r->unparsed_uri); + apr_table_merge(h->resp_hdrs, "Warning", + "110 Response is stale"); + return 1; + } + else { + /* some other error occurred, just treat the object as stale */ + ap_log_error(APLOG_MARK, APLOG_DEBUG, status, r->server, + "Attempt to obtain a cache lock for stale " + "cached URL failed, revalidating entry anyway: %s", + r->unparsed_uri); + return 0; + } + } /* @@ -578,7 +803,7 @@ CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels, } /* - * Create a new table consisting of those elements from an + * Create a new table consisting of those elements from an * headers table that are allowed to be stored in a cache. */ CACHE_DECLARE(apr_table_t *)ap_cache_cacheable_headers(apr_pool_t *pool, diff --git a/modules/cache/mod_cache.c b/modules/cache/mod_cache.c index 677a9dad02..e0d6f5547f 100644 --- a/modules/cache/mod_cache.c +++ b/modules/cache/mod_cache.c @@ -111,41 +111,58 @@ static int cache_url_handler(request_rec *r, int lookup) if (rv != OK) { if (rv == DECLINED) { if (!lookup) { - - /* - * Add cache_save filter to cache this request. Choose - * the correct filter by checking if we are a subrequest - * or not. + char *key = NULL; + + /* try to obtain a cache lock at this point. if we succeed, + * we are the first to try and cache this url. if we fail, + * it means someone else is already trying to cache this + * url, and we should just let the request through to the + * backend without any attempt to cache. this stops + * duplicated simultaneous attempts to cache an entity. */ - if (r->main) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, - r->server, - "Adding CACHE_SAVE_SUBREQ filter for %s", - r->uri); - ap_add_output_filter_handle(cache_save_subreq_filter_handle, - NULL, r, r->connection); + rv = ap_cache_try_lock(conf, r, NULL); + if (APR_SUCCESS == rv) { + + /* + * Add cache_save filter to cache this request. Choose + * the correct filter by checking if we are a subrequest + * or not. + */ + if (r->main) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, + r->server, + "Adding CACHE_SAVE_SUBREQ filter for %s", + r->uri); + ap_add_output_filter_handle(cache_save_subreq_filter_handle, + NULL, r, r->connection); + } + else { + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, + r->server, "Adding CACHE_SAVE filter for %s", + r->uri); + ap_add_output_filter_handle(cache_save_filter_handle, + NULL, r, r->connection); + } + + ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, + "Adding CACHE_REMOVE_URL filter for %s", + r->uri); + + /* Add cache_remove_url filter to this request to remove a + * stale cache entry if needed. Also put the current cache + * request rec in the filter context, as the request that + * is available later during running the filter maybe + * different due to an internal redirect. + */ + cache->remove_url_filter = + ap_add_output_filter_handle(cache_remove_url_filter_handle, + cache, r, r->connection); } else { ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, - r->server, "Adding CACHE_SAVE filter for %s", - r->uri); - ap_add_output_filter_handle(cache_save_filter_handle, - NULL, r, r->connection); + r->server, "Cache locked for url, not caching " + "response: %s", r->uri); } - - ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, - "Adding CACHE_REMOVE_URL filter for %s", - r->uri); - - /* Add cache_remove_url filter to this request to remove a - * stale cache entry if needed. Also put the current cache - * request rec in the filter context, as the request that - * is available later during running the filter maybe - * different due to an internal redirect. - */ - cache->remove_url_filter = - ap_add_output_filter_handle(cache_remove_url_filter_handle, - cache, r, r->connection); } else { if (cache->stale_headers) { @@ -164,7 +181,7 @@ static int cache_url_handler(request_rec *r, int lookup) /* error */ ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, "cache: error returned while checking for cached " - "file by %s cache", cache->provider_name); + "file by '%s' cache", cache->provider_name); } return DECLINED; } @@ -311,6 +328,10 @@ static int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) * Check to see if we *can* save this particular response. * If we can, call cache_create_entity() and save the headers and body * Finally, pass the data to the next filter (the network or whatever) + * + * After the various failure cases, the cache lock is proactively removed, so + * that another request is given the opportunity to attempt to cache without + * waiting for a potentially slow client to acknowledge the failure. */ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) @@ -326,6 +347,7 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) cache_info *info = NULL; char *reason; apr_pool_t *p; + apr_bucket *e; conf = (cache_server_conf *) ap_get_module_config(r->server->module_config, &cache_module); @@ -368,7 +390,24 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: Cache provider's store_body failed!"); ap_remove_output_filter(f); + + /* give someone else the chance to cache the file */ + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); } + + /* proactively remove the lock as soon as we see the eos bucket */ + for (e = APR_BRIGADE_FIRST(in); + e != APR_BRIGADE_SENTINEL(in); + e = APR_BUCKET_NEXT(e)) + { + if (APR_BUCKET_IS_EOS(e)) { + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); + break; + } + } + return ap_pass_brigade(f->next, in); } @@ -439,17 +478,17 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) * telling us to serve the cached copy. */ if (exps != NULL || cc_out != NULL) { - /* We are also allowed to cache any response given that it has a - * valid Expires or Cache Control header. If we find a either of - * those here, we pass request through the rest of the tests. From + /* We are also allowed to cache any response given that it has a + * valid Expires or Cache Control header. If we find a either of + * those here, we pass request through the rest of the tests. From * the RFC: * - * A response received with any other status code (e.g. status - * codes 302 and 307) MUST NOT be returned in a reply to a - * subsequent request unless there are cache-control directives or - * another header(s) that explicitly allow it. For example, these - * include the following: an Expires header (section 14.21); a - * "max-age", "s-maxage", "must-revalidate", "proxy-revalidate", + * A response received with any other status code (e.g. status + * codes 302 and 307) MUST NOT be returned in a reply to a + * subsequent request unless there are cache-control directives or + * another header(s) that explicitly allow it. For example, these + * include the following: an Expires header (section 14.21); a + * "max-age", "s-maxage", "must-revalidate", "proxy-revalidate", * "public" or "private" cache-control directive (section 14.9). */ } @@ -538,7 +577,7 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) "*", NULL)) { reason = "Vary header contains '*'"; } - else if (apr_table_get(r->subprocess_env, "no-cache") != NULL) { + else if (apr_table_get(r->subprocess_env, "no-cache") != NULL) { reason = "environment variable 'no-cache' is set"; } else if (r->no_cache) { @@ -554,6 +593,10 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) /* remove this filter from the chain */ ap_remove_output_filter(f); + /* remove the lock file unconditionally */ + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); + /* ship the data up the stack */ return ap_pass_brigade(f->next, in); } @@ -578,7 +621,6 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) /* if we don't get the content-length, see if we have all the * buckets and use their length to calculate the size */ - apr_bucket *e; int all_buckets_here=0; int unresolved_length = 0; size=0; @@ -655,6 +697,8 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) if (rv != OK) { /* Caching layer declined the opportunity to cache the response */ ap_remove_output_filter(f); + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); return ap_pass_brigade(f->next, in); } @@ -845,16 +889,23 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: attempt to remove url from cache unsuccessful."); } + } + /* let someone else attempt to cache */ + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); + return ap_pass_brigade(f->next, bb); } - if(rv != APR_SUCCESS) { + if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: store_headers failed"); - ap_remove_output_filter(f); + ap_remove_output_filter(f); + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); return ap_pass_brigade(f->next, in); } @@ -863,6 +914,21 @@ static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in) ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server, "cache: store_body failed"); ap_remove_output_filter(f); + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); + return ap_pass_brigade(f->next, in); + } + + /* proactively remove the lock as soon as we see the eos bucket */ + for (e = APR_BRIGADE_FIRST(in); + e != APR_BRIGADE_SENTINEL(in); + e = APR_BUCKET_NEXT(e)) + { + if (APR_BUCKET_IS_EOS(e)) { + ap_cache_remove_lock(conf, r, cache->handle ? + (char *)cache->handle->cache_obj->key : NULL, in); + break; + } } return ap_pass_brigade(f->next, in); @@ -908,6 +974,7 @@ static int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in) ap_remove_output_filter(f); return ap_pass_brigade(f->next, in); } + /* Now remove this cache entry from the cache */ cache_remove_url(cache, r->pool); @@ -921,6 +988,7 @@ static int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in) static void * create_cache_config(apr_pool_t *p, server_rec *s) { + const char *tmppath; cache_server_conf *ps = apr_pcalloc(p, sizeof(cache_server_conf)); /* array of URL prefixes for which caching is enabled */ @@ -955,6 +1023,13 @@ static void * create_cache_config(apr_pool_t *p, server_rec *s) /* array of identifiers that should not be used for key calculation */ ps->ignore_session_id = apr_array_make(p, 10, sizeof(char *)); ps->ignore_session_id_set = CACHE_IGNORE_SESSION_ID_UNSET; + ps->lock = 0; /* thundering herd lock defaults to off */ + ps->lock_set = 0; + apr_temp_dir_get(&tmppath, p); + if (tmppath) { + ps->lockpath = apr_pstrcat(p, tmppath, DEFAULT_CACHE_LOCKPATH, NULL); + } + ps->lockmaxage = apr_time_from_sec(DEFAULT_CACHE_MAXAGE); return ps; } @@ -1009,6 +1084,18 @@ static void * merge_cache_config(apr_pool_t *p, void *basev, void *overridesv) (overrides->ignore_session_id_set == CACHE_IGNORE_SESSION_ID_UNSET) ? base->ignore_session_id : overrides->ignore_session_id; + ps->lock = + (overrides->lock_set == 0) + ? base->lock + : overrides->lock; + ps->lockpath = + (overrides->lockpath_set == 0) + ? base->lockpath + : overrides->lockpath; + ps->lockmaxage = + (overrides->lockmaxage_set == 0) + ? base->lockmaxage + : overrides->lockmaxage; return ps; } static const char *set_cache_ignore_no_last_mod(cmd_parms *parms, void *dummy, @@ -1241,6 +1328,55 @@ static const char *set_cache_ignore_querystring(cmd_parms *parms, void *dummy, return NULL; } +static const char *set_cache_lock(cmd_parms *parms, void *dummy, + int flag) +{ + cache_server_conf *conf; + + conf = + (cache_server_conf *)ap_get_module_config(parms->server->module_config, + &cache_module); + conf->lock = flag; + conf->lock_set = 1; + return NULL; +} + +static const char *set_cache_lock_path(cmd_parms *parms, void *dummy, + const char *arg) +{ + cache_server_conf *conf; + + conf = + (cache_server_conf *)ap_get_module_config(parms->server->module_config, + &cache_module); + + conf->lockpath = ap_server_root_relative(parms->pool, arg); + if (!conf->lockpath) { + return apr_pstrcat(parms->pool, "Invalid CacheLockPath path ", + arg, NULL); + } + conf->lockpath_set = 1; + return NULL; +} + +static const char *set_cache_lock_maxage(cmd_parms *parms, void *dummy, + const char *arg) +{ + cache_server_conf *conf; + apr_int64_t seconds; + + conf = + (cache_server_conf *)ap_get_module_config(parms->server->module_config, + &cache_module); + seconds = apr_atoi64(arg); + if (seconds <= 0) { + return "CacheLockMaxAge value must be a non-zero positive integer"; + } + conf->lockmaxage = apr_time_from_sec(seconds); + conf->lockmaxage_set = 1; + return NULL; +} + static int cache_post_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s) { @@ -1302,6 +1438,15 @@ static const command_rec cache_cmds[] = AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF, "The factor used to estimate Expires date from " "LastModified date"), + AP_INIT_FLAG("CacheLock", set_cache_lock, + NULL, RSRC_CONF, + "Enable or disable the thundering herd lock."), + AP_INIT_TAKE1("CacheLockPath", set_cache_lock_path, NULL, RSRC_CONF, + "The thundering herd lock path. Defaults to the '" + DEFAULT_CACHE_LOCKPATH "' directory in the system " + "temp directory."), + AP_INIT_TAKE1("CacheLockMaxAge", set_cache_lock_maxage, NULL, RSRC_CONF, + "Maximum age of any thundering herd lock."), {NULL} }; diff --git a/modules/cache/mod_cache.h b/modules/cache/mod_cache.h index 6f2fdb4a5e..50577d69dd 100644 --- a/modules/cache/mod_cache.h +++ b/modules/cache/mod_cache.h @@ -24,7 +24,7 @@ */ #ifndef MOD_CACHE_H -#define MOD_CACHE_H +#define MOD_CACHE_H #include "apr_hooks.h" #include "apr.h" @@ -85,8 +85,12 @@ #define DEFAULT_CACHE_MINEXPIRE 0 #define DEFAULT_CACHE_EXPIRE MSEC_ONE_HR #define DEFAULT_CACHE_LMFACTOR (0.1) +#define DEFAULT_CACHE_MAXAGE 5 +#define DEFAULT_CACHE_LOCKPATH "/mod_cache-lock" +#define CACHE_LOCKNAME_KEY "mod_cache-lockname" +#define CACHE_LOCKFILE_KEY "mod_cache-lockfile" -/* Create a set of CACHE_DECLARE(type), CACHE_DECLARE_NONSTD(type) and +/* Create a set of CACHE_DECLARE(type), CACHE_DECLARE_NONSTD(type) and * CACHE_DECLARE_DATA with appropriate export and import tags for the platform */ #if !defined(WIN32) @@ -133,7 +137,7 @@ typedef struct { int factor_set; /** ignore the last-modified header when deciding to cache this request */ int no_last_mod_ignore_set; - int no_last_mod_ignore; + int no_last_mod_ignore; /** ignore client's requests for uncached responses */ int ignorecachecontrol; int ignorecachecontrol_set; @@ -161,21 +165,28 @@ typedef struct { #define CACHE_IGNORE_SESSION_ID_SET 1 #define CACHE_IGNORE_SESSION_ID_UNSET 0 int ignore_session_id_set; + /* thundering herd lock */ + int lock; + int lock_set; + const char *lockpath; + int lockpath_set; + apr_time_t lockmaxage; + int lockmaxage_set; } cache_server_conf; /* cache info information */ typedef struct cache_info cache_info; struct cache_info { - /** - * HTTP status code of the cached entity. Though not neccessarily the - * status code finally issued to the request. + /** + * HTTP status code of the cached entity. Though not necessarily the + * status code finally issued to the request. */ - int status; - /** - * the original time corresponding to the 'Date:' header of the request - * served + int status; + /** + * the original time corresponding to the 'Date:' header of the request + * served */ - apr_time_t date; + apr_time_t date; /** a time when the cached entity is due to expire */ apr_time_t expire; /** r->request_time from the same request */ @@ -186,11 +197,11 @@ struct cache_info { /* cache handle information */ -/* XXX TODO On the next structure change/MMN bump, +/* XXX TODO On the next structure change/MMN bump, * count must become an apr_off_t, representing * the potential size of disk cached objects. * Then dig for - * "XXX Bad Temporary Cast - see cache_object_t notes" + * "XXX Bad Temporary Cast - see cache_object_t notes" */ typedef struct cache_object cache_object_t; struct cache_object { @@ -219,7 +230,7 @@ typedef struct { apr_status_t (*store_headers)(cache_handle_t *h, request_rec *r, cache_info *i); apr_status_t (*store_body)(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b); apr_status_t (*recall_headers) (cache_handle_t *h, request_rec *r); - apr_status_t (*recall_body) (cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb); + apr_status_t (*recall_body) (cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb); int (*create_entity) (cache_handle_t *h, request_rec *r, const char *urlkey, apr_off_t len); int (*open_entity) (cache_handle_t *h, request_rec *r, @@ -273,6 +284,45 @@ CACHE_DECLARE(apr_time_t) ap_cache_current_age(cache_info *info, const apr_time_ CACHE_DECLARE(int) ap_cache_check_freshness(cache_handle_t *h, request_rec *r); /** + * Try obtain a cache wide lock on the given cache key. + * + * If we return APR_SUCCESS, we obtained the lock, and we are clear to + * proceed to the backend. If we return APR_EEXISTS, the the lock is + * already locked, someone else has gone to refresh the backend data + * already, so we must return stale data with a warning in the mean + * time. If we return anything else, then something has gone pear + * shaped, and we allow the request through to the backend regardless. + * + * This lock is created from the request pool, meaning that should + * something go wrong and the lock isn't deleted on return of the + * request headers from the backend for whatever reason, at worst the + * lock will be cleaned up when the request is dies or finishes. + * + * If something goes truly bananas and the lock isn't deleted when the + * request dies, the lock will be trashed when its max-age is reached, + * or when a request arrives containing a Cache-Control: no-cache. At + * no point is it possible for this lock to permanently deny access to + * the backend. + */ +CACHE_DECLARE(apr_status_t) ap_cache_try_lock(cache_server_conf *conf, + request_rec *r, char *key); + +/** + * Remove the cache lock, if present. + * + * First, try to close the file handle, whose delete-on-close should + * kill the file. Otherwise, just delete the file by name. + * + * If no lock name has yet been calculated, do the calculation of the + * lock name first before trying to delete the file. + * + * If an optional bucket brigade is passed, the lock will only be + * removed if the bucket brigade contains an EOS bucket. + */ +CACHE_DECLARE(apr_status_t) ap_cache_remove_lock(cache_server_conf *conf, + request_rec *r, char *key, apr_bucket_brigade *bb); + +/** * Merge in cached headers into the response * @param h cache_handle_t * @param r request_rec @@ -284,15 +334,15 @@ CACHE_DECLARE(void) ap_cache_accept_headers(cache_handle_t *h, request_rec *r, CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x); CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y); -CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, - int dirlength, +CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, + int dirlength, const char *name); CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf *conf, apr_uri_t uri); CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list, const char *key, char **val); CACHE_DECLARE(const char *)ap_cache_tokstr(apr_pool_t *p, const char *list, const char **str); -/* Create a new table consisting of those elements from an +/* Create a new table consisting of those elements from an * headers table that are allowed to be stored in a cache. */ CACHE_DECLARE(apr_table_t *)ap_cache_cacheable_headers(apr_pool_t *pool, @@ -340,8 +390,8 @@ apr_status_t cache_recall_entity_body(cache_handle_t *h, apr_pool_t *p, apr_buck /* hooks */ -APR_DECLARE_OPTIONAL_FN(apr_status_t, - ap_cache_generate_key, +APR_DECLARE_OPTIONAL_FN(apr_status_t, + ap_cache_generate_key, (request_rec *r, apr_pool_t*p, char**key )); |