Mercurial > public > src > rhodecode
comparison pylons_app/lib/indexers/daemon.py @ 474:28f19fa562df
updated config files,
Implemented content index extensions with whoosh,
fixed analyzer to match more words
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Sat, 28 Aug 2010 14:53:32 +0200 |
parents | 9b67cebe6609 |
children | c59c4d4323e7 |
comparison
equal
deleted
inserted
replaced
473:0e8ef6f17203 | 474:28f19fa562df |
---|---|
36 import traceback | 36 import traceback |
37 from pylons_app.config.environment import load_environment | 37 from pylons_app.config.environment import load_environment |
38 from pylons_app.model.hg_model import HgModel | 38 from pylons_app.model.hg_model import HgModel |
39 from whoosh.index import create_in, open_dir | 39 from whoosh.index import create_in, open_dir |
40 from shutil import rmtree | 40 from shutil import rmtree |
41 from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, \ | 41 from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ |
42 SCHEMA, IDX_NAME | 42 SCHEMA, IDX_NAME |
43 | 43 |
44 import logging | 44 import logging |
45 import logging.config | 45 import logging.config |
46 logging.config.fileConfig(jn(project_path, 'development.ini')) | 46 logging.config.fileConfig(jn(project_path, 'development.ini')) |
68 return index_paths_ | 68 return index_paths_ |
69 | 69 |
70 def add_doc(self, writer, path, repo): | 70 def add_doc(self, writer, path, repo): |
71 """Adding doc to writer""" | 71 """Adding doc to writer""" |
72 | 72 |
73 #we don't won't to read excluded file extensions just index them | 73 ext = unicode(path.split('/')[-1].split('.')[-1].lower()) |
74 if path.split('/')[-1].split('.')[-1].lower() not in EXCLUDE_EXTENSIONS: | 74 #we just index the content of choosen files |
75 if ext in INDEX_EXTENSIONS: | |
76 log.debug(' >> %s [WITH CONTENT]' % path) | |
75 fobj = open(path, 'rb') | 77 fobj = open(path, 'rb') |
76 content = fobj.read() | 78 content = fobj.read() |
77 fobj.close() | 79 fobj.close() |
78 try: | 80 try: |
79 u_content = unicode(content) | 81 u_content = unicode(content) |
80 except UnicodeDecodeError: | 82 except UnicodeDecodeError: |
81 #incase we have a decode error just represent as byte string | 83 #incase we have a decode error just represent as byte string |
82 u_content = unicode(str(content).encode('string_escape')) | 84 u_content = unicode(str(content).encode('string_escape')) |
83 else: | 85 else: |
84 u_content = u'' | 86 log.debug(' >> %s' % path) |
87 #just index file name without it's content | |
88 u_content = u'' | |
89 | |
85 writer.add_document(owner=unicode(repo.contact), | 90 writer.add_document(owner=unicode(repo.contact), |
86 repository=u"%s" % repo.name, | 91 repository=u"%s" % repo.name, |
87 path=u"%s" % path, | 92 path=u"%s" % path, |
88 content=u_content, | 93 content=u_content, |
89 modtime=os.path.getmtime(path)) | 94 modtime=os.path.getmtime(path), |
95 extension=ext) | |
90 | 96 |
91 def build_index(self): | 97 def build_index(self): |
92 if os.path.exists(IDX_LOCATION): | 98 if os.path.exists(IDX_LOCATION): |
99 log.debug('removing previos index') | |
93 rmtree(IDX_LOCATION) | 100 rmtree(IDX_LOCATION) |
94 | 101 |
95 if not os.path.exists(IDX_LOCATION): | 102 if not os.path.exists(IDX_LOCATION): |
96 os.mkdir(IDX_LOCATION) | 103 os.mkdir(IDX_LOCATION) |
97 | 104 |
100 | 107 |
101 for cnt, repo in enumerate(scan_paths(self.repo_location).values()): | 108 for cnt, repo in enumerate(scan_paths(self.repo_location).values()): |
102 log.debug('building index @ %s' % repo.path) | 109 log.debug('building index @ %s' % repo.path) |
103 | 110 |
104 for idx_path in self.get_paths(repo.path): | 111 for idx_path in self.get_paths(repo.path): |
105 log.debug(' >> %s' % idx_path) | |
106 self.add_doc(writer, idx_path, repo) | 112 self.add_doc(writer, idx_path, repo) |
107 writer.commit(merge=True) | 113 writer.commit(merge=True) |
108 | 114 |
109 log.debug('>>> FINISHED BUILDING INDEX <<<') | 115 log.debug('>>> FINISHED BUILDING INDEX <<<') |
110 | 116 |
168 self.build_index() | 174 self.build_index() |
169 else: | 175 else: |
170 self.update_index() | 176 self.update_index() |
171 | 177 |
172 if __name__ == "__main__": | 178 if __name__ == "__main__": |
173 repo_location = '/home/marcink/python_workspace_dirty/*' | 179 repo_location = '/home/marcink/hg_repos/*' |
174 | 180 full_index = True # False means looking just for changes |
175 try: | 181 try: |
176 l = DaemonLock() | 182 l = DaemonLock() |
177 WhooshIndexingDaemon(repo_location=repo_location).run(full_index=True) | 183 WhooshIndexingDaemon(repo_location=repo_location)\ |
184 .run(full_index=full_index) | |
178 l.release() | 185 l.release() |
179 except LockHeld: | 186 except LockHeld: |
180 sys.exit(1) | 187 sys.exit(1) |
181 | 188 |