Mercurial > public > src > moin > 1.9
changeset 5983:9bd355d893f4
cfg.url_prefix_action_check = True to strictly check whether action URLs look like we generate them
url_prefix_action was introduced to be able to use robots.txt against bots
doing unwanted requests for all sorts of actions, just because they found a
link to them in the menu_bar (or somewhere else).
Problem: even if url_prefix_action was in use (e.g. == 'action'), we still
accepted URls without that prefix (e.g. cached by some bot or search engine
from earlier times or external links). Answering those requests can take
a lot of CPU time and we can't get rid of them without doing the check that
was implemented with this changeset.
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Sun, 07 Apr 2013 00:59:26 +0200 |
parents | 897111701cf8 |
children | e7136d5731df |
files | MoinMoin/config/multiconfig.py MoinMoin/wsgiapp.py docs/CHANGES |
diffstat | 3 files changed, 26 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/config/multiconfig.py Sat Apr 06 23:26:08 2013 +0200 +++ b/MoinMoin/config/multiconfig.py Sun Apr 07 00:59:26 2013 +0200 @@ -1032,6 +1032,8 @@ ('url_prefix_action', None, "Use 'action' to enable action URL generation to be compatible with robots.txt. It will generate .../action/info/PageName?action=info then. Recommended for internet wikis."), + ('url_prefix_action_check', False, + "Do a strict check whether the URL pathes for actions look like we generate them (otherwise do a 404 response)."), ('notification_bot_uri', None, "URI of the Jabber notification bot."),
--- a/MoinMoin/wsgiapp.py Sat Apr 06 23:26:08 2013 +0200 +++ b/MoinMoin/wsgiapp.py Sun Apr 07 00:59:26 2013 +0200 @@ -12,7 +12,7 @@ logging = log.getLogger(__name__) from MoinMoin.web.contexts import AllContext, Context, XMLRPCContext -from MoinMoin.web.exceptions import HTTPException +from MoinMoin.web.exceptions import HTTPException, abort from MoinMoin.web.request import Request, MoinMoinFinish, HeaderSet from MoinMoin.web.utils import check_forbidden, check_surge_protect, fatal_response, \ redirect_last_visited @@ -94,8 +94,12 @@ context.finish() context.clock.stop('run') -def remove_prefix(path, prefix=None): - """ Remove an url prefix from the path info and return shortened path. """ +def remove_prefix(path, action_name, prefix, check_prefix): + """ + Remove an url prefix from the path info and return shortened path. + + If check_prefix is True, we do some consistency checks and 404 invalid URLs. + """ # we can have all action URLs like this: /action/ActionName/PageName?action=ActionName&... # this is just for robots.txt being able to forbid them for crawlers if prefix is not None: @@ -105,13 +109,21 @@ path = path[len(prefix):] action, path = (path.split('/', 1) + ['', ''])[:2] path = '/' + path + if check_prefix and action != action_name: + # inconsistency found (action in querystr != action in path) + abort(404) + elif check_prefix and action_name != 'show': + # invalid: a non-default (non-show) action, but the prefix is not present + abort(404) return path def dispatch(request, context, action_name='show'): cfg = context.cfg # The last component in path_info is the page name, if any - path = remove_prefix(request.path, cfg.url_prefix_action) + path = remove_prefix(request.path, action_name, + cfg.url_prefix_action, + cfg.url_prefix_action_check) if path.startswith('/'): pagename = wikiutil.normalize_pagename(path, cfg)
--- a/docs/CHANGES Sat Apr 06 23:26:08 2013 +0200 +++ b/docs/CHANGES Sun Apr 07 00:59:26 2013 +0200 @@ -34,6 +34,14 @@ Output encoding is utf-8, columns are in this order: time, event, username, ip, wikiname, pagename, url, referrer, ua time: UNIX timestamp (float) + * Added strict checking for action URLs to avoid load caused by bots: + url_prefix_action = 'action' + url_prefix_action_check = True # New, default is False + Note: action URLs generated without these settings will not work any + longer and (with url_prefix_action_check = True) will get rejected by + MoinMoin with a 404 "Not Found" response. + To disallow actions for the bots, add this to /robots.txt: + Disallow: /action/ Version 1.9.7: