Mercurial > public > mercurial-scm > hg
comparison mercurial/pathencode.c @ 18432:39954be8ece7
pathencode: implement the "mangling" part of hashed encoding in C
This will be used by an upcoming patch.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Wed, 12 Dec 2012 13:09:35 -0800 |
parents | 3aa9b2136593 |
children | 79f4a2a8f248 |
comparison
equal
deleted
inserted
replaced
18431:3aa9b2136593 | 18432:39954be8ece7 |
---|---|
521 path, len); | 521 path, len); |
522 | 522 |
523 return ret; | 523 return ret; |
524 } | 524 } |
525 | 525 |
526 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20]) | |
527 { | |
528 static const Py_ssize_t dirprefixlen = 8; | |
529 static const Py_ssize_t maxshortdirslen = 68; | |
530 char *dest; | |
531 PyObject *ret; | |
532 | |
533 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1; | |
534 Py_ssize_t destsize, destlen = 0, slop, used; | |
535 | |
536 while (lastslash >= 0 && src[lastslash] != '/') { | |
537 if (src[lastslash] == '.' && lastdot == -1) | |
538 lastdot = lastslash; | |
539 lastslash--; | |
540 } | |
541 | |
542 #if 0 | |
543 /* All paths should end in a suffix of ".i" or ".d". | |
544 Unfortunately, the file names in test-hybridencode.py | |
545 violate this rule. */ | |
546 if (lastdot != len - 3) { | |
547 PyErr_SetString(PyExc_ValueError, | |
548 "suffix missing or wrong length"); | |
549 return NULL; | |
550 } | |
551 #endif | |
552 | |
553 /* If src contains a suffix, we will append it to the end of | |
554 the new string, so make room. */ | |
555 destsize = 120; | |
556 if (lastdot >= 0) | |
557 destsize += len - lastdot - 1; | |
558 | |
559 ret = PyString_FromStringAndSize(NULL, destsize); | |
560 if (ret == NULL) | |
561 return NULL; | |
562 | |
563 dest = PyString_AS_STRING(ret); | |
564 memcopy(dest, &destlen, destsize, "dh/", 3); | |
565 | |
566 /* Copy up to dirprefixlen bytes of each path component, up to | |
567 a limit of maxshortdirslen bytes. */ | |
568 for (i = d = p = 0; i < lastslash; i++, p++) { | |
569 if (src[i] == '/') { | |
570 char d = dest[destlen - 1]; | |
571 /* After truncation, a directory name may end | |
572 in a space or dot, which are unportable. */ | |
573 if (d == '.' || d == ' ') | |
574 dest[destlen - 1] = '_'; | |
575 if (destlen > maxshortdirslen) | |
576 break; | |
577 charcopy(dest, &destlen, destsize, src[i]); | |
578 p = -1; | |
579 } | |
580 else if (p < dirprefixlen) | |
581 charcopy(dest, &destlen, destsize, src[i]); | |
582 } | |
583 | |
584 /* Rewind to just before the last slash copied. */ | |
585 if (destlen > maxshortdirslen + 3) | |
586 do { | |
587 destlen--; | |
588 } while (destlen > 0 && dest[destlen] != '/'); | |
589 | |
590 if (destlen > 3) { | |
591 if (lastslash > 0) { | |
592 char d = dest[destlen - 1]; | |
593 /* The last directory component may be | |
594 truncated, so make it safe. */ | |
595 if (d == '.' || d == ' ') | |
596 dest[destlen - 1] = '_'; | |
597 } | |
598 | |
599 charcopy(dest, &destlen, destsize, '/'); | |
600 } | |
601 | |
602 /* Add a prefix of the original file's name. Its length | |
603 depends on the number of bytes left after accounting for | |
604 hash and suffix. */ | |
605 used = destlen + 40; | |
606 if (lastdot >= 0) | |
607 used += len - lastdot - 1; | |
608 slop = maxstorepathlen - used; | |
609 if (slop > 0) { | |
610 Py_ssize_t basenamelen = | |
611 lastslash >= 0 ? len - lastslash - 2 : len - 1; | |
612 | |
613 if (basenamelen > slop) | |
614 basenamelen = slop; | |
615 if (basenamelen > 0) | |
616 memcopy(dest, &destlen, destsize, &src[lastslash + 1], | |
617 basenamelen); | |
618 } | |
619 | |
620 /* Add hash and suffix. */ | |
621 for (i = 0; i < 20; i++) | |
622 hexencode(dest, &destlen, destsize, sha[i]); | |
623 | |
624 if (lastdot >= 0) | |
625 memcopy(dest, &destlen, destsize, &src[lastdot], | |
626 len - lastdot - 1); | |
627 | |
628 PyString_GET_SIZE(ret) = destlen; | |
629 | |
630 return ret; | |
631 } | |
632 | |
526 /* | 633 /* |
527 * Avoiding a trip through Python would improve performance by 50%, | 634 * Avoiding a trip through Python would improve performance by 50%, |
528 * but we don't encounter enough long names to be worth the code. | 635 * but we don't encounter enough long names to be worth the code. |
529 */ | 636 */ |
530 static int sha1hash(char hash[20], const char *str, Py_ssize_t len) | 637 static int sha1hash(char hash[20], const char *str, Py_ssize_t len) |