diff hgext3rd/topic/__init__.py @ 6206:2ec9c87e8574

topic: allow unicode symbols in names as long as they are alphanumeric I decided to relax this logic just a little bit to allow unicode "word characters" (i.e. everything that a unicode regex \w matches). This is still limiting cases that core allows (for branches and bookmarks): core only forbids certain byte values (like null byte, see scmutil.checknewlabel function). This extra check for topic names could be dropped altogether and we could rely solely on checknewlabel(), but I don't know if there isn't some corner case that topics can't handle. Needs more investigation (and tests).
author Anton Shestakov <av6@dwimlabs.net>
date Sat, 19 Mar 2022 19:13:00 +0300
parents 9d81041f735f
children 7ad8107d953a
line wrap: on
line diff
--- a/hgext3rd/topic/__init__.py	Sun Mar 13 19:42:10 2022 +0300
+++ b/hgext3rd/topic/__init__.py	Sat Mar 19 19:13:00 2022 +0300
@@ -169,6 +169,7 @@
     cmdutil,
     commands,
     context,
+    encoding,
     error,
     exchange,
     extensions,
@@ -827,10 +828,16 @@
         # Have some restrictions on the topic name just like bookmark name
         scmutil.checknewlabel(repo, topic, b'topic')
 
-        rmatch = re.match(br'[-_.\w]+', topic)
-        if not rmatch or rmatch.group(0) != topic:
-            helptxt = _(b"topic names can only consist of alphanumeric, '-'"
-                        b" '_' and '.' characters")
+        helptxt = _(b"topic names can only consist of alphanumeric, '-'"
+                    b" '_' and '.' characters")
+        try:
+            utopic = encoding.unifromlocal(topic)
+        except error.Abort:
+            # Maybe we should allow these topic names as well, as long as they
+            # don't break any other rules
+            utopic = ''
+        rmatch = re.match(r'[-_.\w]+', utopic, re.UNICODE)
+        if not utopic or not rmatch or rmatch.group(0) != utopic:
             raise error.Abort(_(b"invalid topic name: '%s'") % topic, hint=helptxt)
 
     if list: