Mercurial > public > mercurial-scm > hg-stable
comparison mercurial/utils/stringutil.py @ 47200:b0e92313107e
parselist: move the function from config to stringutil
We move the function in a lower level module to avoid cycle. It moves next to
`parsebool` who had to migrate for the same reasons.
Differential Revision: https://phab.mercurial-scm.org/D10449
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 16 Apr 2021 01:18:28 +0200 |
parents | d4ba4d51f85f |
children | dcdecec401ca |
comparison
equal
deleted
inserted
replaced
47199:353718f741a8 | 47200:b0e92313107e |
---|---|
866 If s is not a valid boolean, returns None. | 866 If s is not a valid boolean, returns None. |
867 """ | 867 """ |
868 return _booleans.get(s.lower(), None) | 868 return _booleans.get(s.lower(), None) |
869 | 869 |
870 | 870 |
871 def parselist(value): | |
872 """parse a configuration value as a list of comma/space separated strings | |
873 | |
874 >>> parselist(b'this,is "a small" ,test') | |
875 ['this', 'is', 'a small', 'test'] | |
876 """ | |
877 | |
878 def _parse_plain(parts, s, offset): | |
879 whitespace = False | |
880 while offset < len(s) and ( | |
881 s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' | |
882 ): | |
883 whitespace = True | |
884 offset += 1 | |
885 if offset >= len(s): | |
886 return None, parts, offset | |
887 if whitespace: | |
888 parts.append(b'') | |
889 if s[offset : offset + 1] == b'"' and not parts[-1]: | |
890 return _parse_quote, parts, offset + 1 | |
891 elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': | |
892 parts[-1] = parts[-1][:-1] + s[offset : offset + 1] | |
893 return _parse_plain, parts, offset + 1 | |
894 parts[-1] += s[offset : offset + 1] | |
895 return _parse_plain, parts, offset + 1 | |
896 | |
897 def _parse_quote(parts, s, offset): | |
898 if offset < len(s) and s[offset : offset + 1] == b'"': # "" | |
899 parts.append(b'') | |
900 offset += 1 | |
901 while offset < len(s) and ( | |
902 s[offset : offset + 1].isspace() | |
903 or s[offset : offset + 1] == b',' | |
904 ): | |
905 offset += 1 | |
906 return _parse_plain, parts, offset | |
907 | |
908 while offset < len(s) and s[offset : offset + 1] != b'"': | |
909 if ( | |
910 s[offset : offset + 1] == b'\\' | |
911 and offset + 1 < len(s) | |
912 and s[offset + 1 : offset + 2] == b'"' | |
913 ): | |
914 offset += 1 | |
915 parts[-1] += b'"' | |
916 else: | |
917 parts[-1] += s[offset : offset + 1] | |
918 offset += 1 | |
919 | |
920 if offset >= len(s): | |
921 real_parts = _configlist(parts[-1]) | |
922 if not real_parts: | |
923 parts[-1] = b'"' | |
924 else: | |
925 real_parts[0] = b'"' + real_parts[0] | |
926 parts = parts[:-1] | |
927 parts.extend(real_parts) | |
928 return None, parts, offset | |
929 | |
930 offset += 1 | |
931 while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: | |
932 offset += 1 | |
933 | |
934 if offset < len(s): | |
935 if offset + 1 == len(s) and s[offset : offset + 1] == b'"': | |
936 parts[-1] += b'"' | |
937 offset += 1 | |
938 else: | |
939 parts.append(b'') | |
940 else: | |
941 return None, parts, offset | |
942 | |
943 return _parse_plain, parts, offset | |
944 | |
945 def _configlist(s): | |
946 s = s.rstrip(b' ,') | |
947 if not s: | |
948 return [] | |
949 parser, parts, offset = _parse_plain, [b''], 0 | |
950 while parser: | |
951 parser, parts, offset = parser(parts, s, offset) | |
952 return parts | |
953 | |
954 if value is not None and isinstance(value, bytes): | |
955 result = _configlist(value.lstrip(b' ,\n')) | |
956 else: | |
957 result = value | |
958 return result or [] | |
959 | |
960 | |
871 def evalpythonliteral(s): | 961 def evalpythonliteral(s): |
872 """Evaluate a string containing a Python literal expression""" | 962 """Evaluate a string containing a Python literal expression""" |
873 # We could backport our tokenizer hack to rewrite '' to u'' if we want | 963 # We could backport our tokenizer hack to rewrite '' to u'' if we want |
874 if pycompat.ispy3: | 964 if pycompat.ispy3: |
875 return ast.literal_eval(s.decode('latin1')) | 965 return ast.literal_eval(s.decode('latin1')) |