Logo Search packages:      
Sourcecode: zeitgeist-extensions version File versions  Download package

def fts::Indexer::_index_uri (   self,
  uri 
) [private]

Index `uri` into the document currectly set on self._tokenizer

Definition at line 348 of file fts.py.

00348                                 :
            """
            Index `uri` into the document currectly set on self._tokenizer
            """
            # File URIs and paths are indexed in one way, and all other,
            # usually web URIs, are indexed in another way because there may
            # be domain name etc. in there we want to rank differently
            scheme, host, path = self._split_uri (uri)
            if scheme == "file://" or not scheme:
                  path, name = os.path.split(path)
                  self._tokenizer.index_text(name, 5)
                  self._tokenizer.index_text(name, 5, "N")
                  
                  # Index parent names with descending weight
                  weight = 5
                  while path and name:
                        weight = weight / 1.5
                        path, name = os.path.split(path)
                        self._tokenizer.index_text(name, weight)
                  
            elif scheme == "mailto:":
                  tokens = host.split("@")
                  name = tokens[0]
                  self._tokenizer.index_text(name, 6)
                  if len(tokens) > 1:
                        self._tokenizer.index_text(" ".join[1:], 1)
            else:
                  path, name = os.path.split(path)
                  if name:
                        self._tokenizer.index_text(name, 5)
                        self._tokenizer.index_text(name, 5, "N")
                  if path:
                        self._tokenizer.index_text(path, 1)
                        self._tokenizer.index_text(path, 1, "N")
                  if host:
                        self._tokenizer.index_text(host, 2)
                        self._tokenizer.index_text(host, 2, "N")
                        self._tokenizer.index_text(host, 2, "S")
      
      def _index_text (self, text):


Generated by  Doxygen 1.6.0   Back to index