Mercurial > public > src > rhodecode
changeset 2745:2ad50c44b025 beta
when indexing changesets use the raw_id to locate the point from
which to start indexing rather than the revision which can be unreliable.
author | Indra Talip <indra.talip@gmail.com> |
---|---|
date | Sat, 21 Jul 2012 08:20:32 +0200 |
parents | 88b0e82bcba4 |
children | bed929b24f63 |
files | rhodecode/lib/indexers/__init__.py rhodecode/lib/indexers/daemon.py rhodecode/tests/functional/test_search.py |
diffstat | 3 files changed, 28 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/rhodecode/lib/indexers/__init__.py Sat Jul 21 06:01:32 2012 +0200 +++ b/rhodecode/lib/indexers/__init__.py Sat Jul 21 08:20:32 2012 +0200 @@ -74,7 +74,6 @@ CHGSETS_SCHEMA = Schema( raw_id=ID(unique=True, stored=True), - revision=NUMERIC(unique=True, stored=True), last=BOOLEAN(), owner=TEXT(), repository=ID(unique=True, stored=True),
--- a/rhodecode/lib/indexers/daemon.py Sat Jul 21 06:01:32 2012 +0200 +++ b/rhodecode/lib/indexers/daemon.py Sat Jul 21 08:20:32 2012 +0200 @@ -168,23 +168,34 @@ ) return indexed, indexed_w_content - def index_changesets(self, writer, repo_name, repo, start_rev=0): + def index_changesets(self, writer, repo_name, repo, start_rev=None): """ Add all changeset in the vcs repo starting at start_rev to the index writer + + :param writer: the whoosh index writer to add to + :param repo_name: name of the repository from whence the + changeset originates including the repository group + :param repo: the vcs repository instance to index changesets for, + the presumption is the repo has changesets to index + :param start_rev=None: the full sha id to start indexing from + if start_rev is None then index from the first changeset in + the repo """ - log.debug('indexing changesets in %s[%d:]' % (repo_name, start_rev)) + if start_rev is None: + start_rev = repo[0].raw_id + + log.debug('indexing changesets in %s starting at rev: %s' % (repo_name, start_rev)) indexed=0 - for cs in repo[start_rev:]: + for cs in repo.get_changesets(start=start_rev): writer.add_document( raw_id=unicode(cs.raw_id), owner=unicode(repo.contact), repository=safe_unicode(repo_name), author=cs.author, message=cs.message, - revision=cs.revision, last=cs.last, added=u' '.join([node.path for node in cs.added]).lower(), removed=u' '.join([node.path for node in cs.removed]).lower(), @@ -214,21 +225,27 @@ try: for repo_name, repo in self.repo_paths.items(): # skip indexing if there aren't any revs in the repo - revs = repo.revisions - if len(revs) < 1: + num_of_revs = len(repo) + if num_of_revs < 1: continue qp = QueryParser('repository', schema=CHGSETS_SCHEMA) q = qp.parse(u"last:t AND %s" % repo_name) - results = searcher.search(q, sortedby='revision') + results = searcher.search(q) + # default to scanning the entire repo last_rev = 0 + start_id = None + if len(results) > 0: - last_rev = results[0]['revision'] + # assuming that there is only one result, if not this + # may require a full re-index. + start_id = results[0]['raw_id'] + last_rev = repo.get_changeset(revision=start_id).revision # there are new changesets to index or a new repo to index - if last_rev == 0 or len(revs) > last_rev + 1: + if last_rev == 0 or num_of_revs > last_rev + 1: # delete the docs in the index for the previous last changeset(s) for hit in results: q = qp.parse(u"last:t AND %s AND raw_id:%s" % @@ -236,7 +253,7 @@ writer.delete_by_query(q) # index from the previous last changeset + all new ones - self.index_changesets(writer, repo_name, repo, last_rev) + self.index_changesets(writer, repo_name, repo, start_id) writer_is_dirty = True finally:
--- a/rhodecode/tests/functional/test_search.py Sat Jul 21 06:01:32 2012 +0200 +++ b/rhodecode/tests/functional/test_search.py Sat Jul 21 08:20:32 2012 +0200 @@ -72,7 +72,7 @@ def test_search_author(self): self.log_user() response = self.app.get(url(controller='search', action='index'), - {'q': 'author:marcin@python-blog.com revision:0', + {'q': 'author:marcin@python-blog.com raw_id:b986218ba1c9b0d6a259fac9b050b1724ed8e545', 'type': 'commit'}) response.mustcontain('1 results')