comparison mercurial/pathencode.c @ 18432:39954be8ece7

pathencode: implement the "mangling" part of hashed encoding in C This will be used by an upcoming patch.
author Bryan O'Sullivan <bryano@fb.com>
date Wed, 12 Dec 2012 13:09:35 -0800
parents 3aa9b2136593
children 79f4a2a8f248
comparison
equal deleted inserted replaced
18431:3aa9b2136593 18432:39954be8ece7
521 path, len); 521 path, len);
522 522
523 return ret; 523 return ret;
524 } 524 }
525 525
526 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
527 {
528 static const Py_ssize_t dirprefixlen = 8;
529 static const Py_ssize_t maxshortdirslen = 68;
530 char *dest;
531 PyObject *ret;
532
533 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
534 Py_ssize_t destsize, destlen = 0, slop, used;
535
536 while (lastslash >= 0 && src[lastslash] != '/') {
537 if (src[lastslash] == '.' && lastdot == -1)
538 lastdot = lastslash;
539 lastslash--;
540 }
541
542 #if 0
543 /* All paths should end in a suffix of ".i" or ".d".
544 Unfortunately, the file names in test-hybridencode.py
545 violate this rule. */
546 if (lastdot != len - 3) {
547 PyErr_SetString(PyExc_ValueError,
548 "suffix missing or wrong length");
549 return NULL;
550 }
551 #endif
552
553 /* If src contains a suffix, we will append it to the end of
554 the new string, so make room. */
555 destsize = 120;
556 if (lastdot >= 0)
557 destsize += len - lastdot - 1;
558
559 ret = PyString_FromStringAndSize(NULL, destsize);
560 if (ret == NULL)
561 return NULL;
562
563 dest = PyString_AS_STRING(ret);
564 memcopy(dest, &destlen, destsize, "dh/", 3);
565
566 /* Copy up to dirprefixlen bytes of each path component, up to
567 a limit of maxshortdirslen bytes. */
568 for (i = d = p = 0; i < lastslash; i++, p++) {
569 if (src[i] == '/') {
570 char d = dest[destlen - 1];
571 /* After truncation, a directory name may end
572 in a space or dot, which are unportable. */
573 if (d == '.' || d == ' ')
574 dest[destlen - 1] = '_';
575 if (destlen > maxshortdirslen)
576 break;
577 charcopy(dest, &destlen, destsize, src[i]);
578 p = -1;
579 }
580 else if (p < dirprefixlen)
581 charcopy(dest, &destlen, destsize, src[i]);
582 }
583
584 /* Rewind to just before the last slash copied. */
585 if (destlen > maxshortdirslen + 3)
586 do {
587 destlen--;
588 } while (destlen > 0 && dest[destlen] != '/');
589
590 if (destlen > 3) {
591 if (lastslash > 0) {
592 char d = dest[destlen - 1];
593 /* The last directory component may be
594 truncated, so make it safe. */
595 if (d == '.' || d == ' ')
596 dest[destlen - 1] = '_';
597 }
598
599 charcopy(dest, &destlen, destsize, '/');
600 }
601
602 /* Add a prefix of the original file's name. Its length
603 depends on the number of bytes left after accounting for
604 hash and suffix. */
605 used = destlen + 40;
606 if (lastdot >= 0)
607 used += len - lastdot - 1;
608 slop = maxstorepathlen - used;
609 if (slop > 0) {
610 Py_ssize_t basenamelen =
611 lastslash >= 0 ? len - lastslash - 2 : len - 1;
612
613 if (basenamelen > slop)
614 basenamelen = slop;
615 if (basenamelen > 0)
616 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
617 basenamelen);
618 }
619
620 /* Add hash and suffix. */
621 for (i = 0; i < 20; i++)
622 hexencode(dest, &destlen, destsize, sha[i]);
623
624 if (lastdot >= 0)
625 memcopy(dest, &destlen, destsize, &src[lastdot],
626 len - lastdot - 1);
627
628 PyString_GET_SIZE(ret) = destlen;
629
630 return ret;
631 }
632
526 /* 633 /*
527 * Avoiding a trip through Python would improve performance by 50%, 634 * Avoiding a trip through Python would improve performance by 50%,
528 * but we don't encounter enough long names to be worth the code. 635 * but we don't encounter enough long names to be worth the code.
529 */ 636 */
530 static int sha1hash(char hash[20], const char *str, Py_ssize_t len) 637 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)