# HG changeset patch
# User Keshav Kini <kesha...@gmail.com>
# Date 1328230521 -28800
# Node ID f87f504dad1d2eaa9678d5d548572e6bbd4fd8e8
# Parent 1189e52ba27c468788de4ed2cd93eef1548ec413
Remove illegal characters from username/email
diff --git a/hggit/git_handler.py b/hggit/git_handler.py
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -352,7 +352,7 @@
return commit.id
def get_valid_git_username_email(self, name):
- return name.lstrip('< ').rstrip('> ')
+ return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
def get_git_author(self, ctx):
# hg authors might not have emails
@@ -363,8 +363,8 @@
a = regex.match(author)
if a:
- name = a.group(1)
- email = a.group(2)
+ name = self.get_valid_git_username_email(a.group(1))
+ email = self.get_valid_git_username_email(a.group(2))
if a.group(3) != None and len(a.group(3)) != 0:
name += ' ext:(' + urllib.quote(a.group(3)) + ')'
author = self.get_valid_git_username_email(name) + ' <' + self.get_valid_git_username_email(email) + '>'
> hggit/git_handler.py | 6 +++---
> 1 files changed, 3 insertions(+), 3 deletions(-)
>
>
> # HG changeset patch
> # User Keshav Kini <kesha...@gmail.com>
> # Date 1328230521 -28800
> # Node ID f87f504dad1d2eaa9678d5d548572e6bbd4fd8e8
> # Parent 1189e52ba27c468788de4ed2cd93eef1548ec413
> Remove illegal characters from username/email
>
> diff --git a/hggit/git_handler.py b/hggit/git_handler.py
> --- a/hggit/git_handler.py
> +++ b/hggit/git_handler.py
> @@ -352,7 +352,7 @@
> return commit.id
>
> def get_valid_git_username_email(self, name):
> - return name.lstrip('< ').rstrip('> ')
> + return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
This is now sufficiently magical that I need it to have tests, or I wont take it. I'm too paranoid about breaking this in mysterious ways now.
>
> def get_git_author(self, ctx):
> # hg authors might not have emails
> @@ -363,8 +363,8 @@
> a = regex.match(author)
>
> if a:
> - name = a.group(1)
> - email = a.group(2)
> + name = self.get_valid_git_username_email(a.group(1))
> + email = self.get_valid_git_username_email(a.group(2))
> if a.group(3) != None and len(a.group(3)) != 0:
> name += ' ext:(' + urllib.quote(a.group(3)) + ')'
> author = self.get_valid_git_username_email(name) + ' <' + self.get_valid_git_username_email(email) + '>'
>
> --
> You received this message because you are subscribed to the Google Groups "hg-git" group.
> To post to this group, send email to hg-...@googlegroups.com.
> To unsubscribe from this group, send email to hg-git+un...@googlegroups.com.
> For more options, visit this group at http://groups.google.com/group/hg-git?hl=en.
>
This description should *definitely* be part of the docstring. I'm
fully ignorant of git's absurd formatting requirements on its username
field that it only erratically enforces.
Look in the tests/ directory. Emulate something in there, or add to
it. Docstring tests would also be helpful, and in this case might be
sufficient.
Thanks!
>
> (Note to others reading this: I am in part also responding to a comment
> posted on https://bitbucket.org/durin42/hg-git/pull-request/4/ .)
>
> The links: http://hg.sagemath.org/sage-main/rev/8dc790969cfe and
> https://github.com/sagemath/sagelib/commit/a7e723ac06ac6b7247432f50ce2ea44791daa7ac
> . Those links just point to the problematic commit, but the rest of the
> repository is also accessible from there.
While the real-world examples are appreciated, I lack the time and
energy to manually verify things like this in the future. An automated
test is the only real option.
>
> Thanks,
> Keshav
>
> --
> You received this message because you are subscribed to the Google Groups
> "hg-git" group.
> To view this discussion on the web visit
> https://groups.google.com/d/msg/hg-git/-/xYXxDPmzRSAJ.
diff --git a/hggit/git_handler.py b/hggit/git_handler.py
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -352,6 +352,50 @@
return commit.id
def get_valid_git_username_email(self, name):
+ r"""
+ Sanitize usernames and emails to fit git's restrictions.
+
+ The following is taken from the man page of git's fast-import
+ command:
+
+ [...] Likewise LF means one (and only one) linefeed [...]
+
+ committer
+ The committer command indicates who made this commit,
+ and when they made it.
+
+ Here <name> is the person's display name (for example
+ "Com M Itter") and <email> is the person's email address
+ ("c...@example.com[1]"). LT and GT are the literal
+ less-than (\x3c) and greater-than (\x3e) symbols. These
+ are required to delimit the email address from the other
+ fields in the line. Note that <name> and <email> are
+ free-form and may contain any sequence of bytes, except
+ LT, GT and LF. <name> is typically UTF-8 encoded.
+
+ Accordingly, this function makes sure that there are none of the
+ characters <, >, or \n in any string which will be used for
+ a git username or email. Before this, it first removes left
+ angle brackets and spaces from the beginning, and right angle
+ brackets and spaces from the end, of this string, to convert
+ such things as " <jo...@doe.com> " to "jo...@doe.com" for
+ convenience.
+
+ TESTS::
+
+ >>> from mercurial.ui import ui
+ >>> g = GitHandler('', ui()).get_valid_git_username_email
+ >>> g('John Doe')
+ 'John Doe'
+ >>> g('jo...@doe.com')
+ 'jo...@doe.com'
+ >>> g(' <jo...@doe.com> ')
+ 'jo...@doe.com'
+ >>> g(' <random<\n<garbage\n> > > ')
+ 'random???garbage?'
+ >>> g('Typo in hgrc >but.h...@handles.it.gracefully>')
+ 'Typo in hgrc ?but.h...@handles.it.gracefully'
+ """
return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
def get_git_author(self, ctx):
'make tests' in the project root.
>
> Thanks,
> Keshav
>
> --
> You received this message because you are subscribed to the Google Groups
> "hg-git" group.
> To view this discussion on the web visit
> https://groups.google.com/d/msg/hg-git/-/wiKRj5bIzLsJ.
# HG changeset patch
# User Keshav Kini <kesha...@gmail.com>
# Date 1328796642 -28800
# Node ID ec790587cb1a04cc84554d10dd12157abd72004f
# Parent 405df1102aa6acdb966d0cfc1d67334ddf401c20
Add a test to tests/test-hg-author for handling illegal characters
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
help:
@echo 'Commonly used make targets:'
@echo ' tests - run all tests in the automatic test suite'
- @echo ' all-version-tests - run all tests against many hg versions'
+ @echo ' all-version-tests - run all tests against many hg versions'
@echo ' tests-%s - run all tests in the specified hg version'
all: help
diff --git a/tests/test-hg-author b/tests/test-hg-author
--- a/tests/test-hg-author
+++ b/tests/test-hg-author
@@ -99,6 +99,11 @@
hgcommit -u "test < te...@example.com >" -m 'add eta'
hg push
+echo theta > theta
+hg add theta
+hgcommit -u "test >te...@example.com>" -m 'add theta'
+hg push
+
hg log --graph | egrep -v ': *(not-master|master)'
cd ..
diff --git a/tests/test-hg-author.out b/tests/test-hg-author.out
--- a/tests/test-hg-author.out
+++ b/tests/test-hg-author.out
@@ -39,9 +39,19 @@
creating and sending data
default::refs/heads/not-master => GIT:7eeab2ea
default::refs/heads/master => GIT:8c878c97
-@ changeset: 7:b90e988091a2
+pushing to git://localhost/gitrepo
+exporting hg objects to git
+creating and sending data
+ default::refs/heads/not-master => GIT:7eeab2ea
+ default::refs/heads/master => GIT:1e03e913
+@ changeset: 8:d3c51ce68cfd
| tag: default/master
| tag: tip
+| user: test >te...@example.com>
+| date: Mon Jan 01 00:00:18 2007 +0000
+| summary: add theta
+|
+o changeset: 7:b90e988091a2
| user: test < te...@example.com >
| date: Mon Jan 01 00:00:17 2007 +0000
| summary: add eta
@@ -83,10 +93,15 @@
summary: add alpha
importing git objects into hg
-7 files updated, 0 files merged, 0 files removed, 0 files unresolved
-@ changeset: 7:8ab87d5066e4
+8 files updated, 0 files merged, 0 files removed, 0 files unresolved
+@ changeset: 8:efec0270e295
| tag: default/master
| tag: tip
+| user: test ?te...@example.com <test ?te...@example.com>
+| date: Mon Jan 01 00:00:18 2007 +0000
+| summary: add theta
+|
+o changeset: 7:8ab87d5066e4
| user: test <te...@example.com>
| date: Mon Jan 01 00:00:17 2007 +0000
| summary: add eta
@@ -127,6 +142,12 @@
date: Mon Jan 01 00:00:10 2007 +0000
summary: add alpha
+commit 1e03e913eca571b86ee06d3c1ddd795dde9ca917
+Author: test ?te...@example.com <test ?te...@example.com>
+Date: Mon Jan 1 00:00:18 2007 +0000
+
+ add theta
+
commit 8c878c9764e96e67ed9f62b3f317d156bf71bc52
Author: test <te...@example.com>
Date: Mon Jan 1 00:00:17 2007 +0000
On Feb 8, 2012, at 11:20 AM, Keshav Kini wrote:
> # HG changeset patch
> # User Keshav Kini <kesha...@gmail.com>
> # Date 1328721209 -28800
> # Node ID 405df1102aa6acdb966d0cfc1d67334ddf401c20
> # Parent f87f504dad1d2eaa9678d5d548572e6bbd4fd8e8
> Add docstring, doctests for previous commit
>
> diff --git a/hggit/git_handler.py b/hggit/git_handler.py
> --- a/hggit/git_handler.py
> +++ b/hggit/git_handler.py
> @@ -352,6 +352,50 @@
> return commit.id
>
> def get_valid_git_username_email(self, name):
> + r"""
> + Sanitize usernames and emails to fit git's restrictions.
This should be on the first line of the docstring with the """.
Does doctest run these correctly? I'm just not sure if it'll run the indented tests.
> + """
> return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
>
> def get_git_author(self, ctx):
>
> --
> You received this message because you are subscribed to the Google Groups "hg-git" group.
> 1) Did you try to apply it over my previous patch? Observe that the Node-ID
> and Parent headers of my patches match each other and your tip, so unless
> they were corrupted in transit or the patchbomb extension is buggy, I can't
> see how they wouldn't apply. Does it apply with `hg import --exact`?
No, I assumed that this would supercede any previous patch series. Please send the entire series, I've queued nothing.
>
> 2) OK. http://www.python.org/dev/peps/pep-0257/#multi-line-docstrings says
> "The summary line may be on the same line as the opening quotes or on the
> next line". Of the multiline docstrings in the hg-git codebase, which I can
> apparently count on my thumbs, one has its summary line on the same line as
> the opening quotes and the other does not, so this choice of convention is
> not exactly self-evident. But OK.
>
> 3) To paraphrase
> http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#doctest-blocks
> , yes, it does. To quote
> http://docs.python.org/library/doctest.html#how-are-docstring-examples-recognized
> , "the starting column doesn't matter". You could also run a doctest with
> `python -m doctest -v <filename>` and check this yourself, if the patch had
> applied correctly, though I don't understand why it doesn't.
>
> 4) OK. I asked you earlier whether you would like me to fold my patches
> together, but you didn't respond, so I assumed not, and published my
> commits on my bitbucket fork, so folding them now would be rewriting public
> history (which I have even asked ~2 people to clone). But OK.
Sorry, I totally missed that. Your history isn't canonical here, so I'm (frankly) not worried about mutating the history. Phases will make this better.
>
> A single patch is forthcoming.
>
> -Keshav
>
> --
> You received this message because you are subscribed to the Google Groups "hg-git" group.
> To view this discussion on the web visit https://groups.google.com/d/msg/hg-git/-/1Rb1kbI6pu0J.
# HG changeset patch
# User Keshav Kini <kesha...@gmail.com>
# Date 1329269406 -28800
# Node ID d027babd300b0cc97a18d635c867f761d2e6a35d
# Parent 1189e52ba27c468788de4ed2cd93eef1548ec413
Remove illegal characters from username/email
* * *
Add docstring, doctests for previous commit
* * *
Add a test to tests/test-hg-author for handling illegal characters
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
help:
@echo 'Commonly used make targets:'
@echo ' tests - run all tests in the automatic test suite'
- @echo ' all-version-tests - run all tests against many hg versions'
+ @echo ' all-version-tests - run all tests against many hg versions'
@echo ' tests-%s - run all tests in the specified hg version'
all: help
diff --git a/hggit/git_handler.py b/hggit/git_handler.py
--- a/hggit/git_handler.py
+++ b/hggit/git_handler.py
@@ -352,7 +352,50 @@
return commit.id
def get_valid_git_username_email(self, name):
- return name.lstrip('< ').rstrip('> ')
+ r"""Sanitize usernames and emails to fit git's restrictions.
+ """
+ return re.sub('[<>\n]', '?', name.lstrip('< ').rstrip('> '))
def get_git_author(self, ctx):
# hg authors might not have emails
@@ -363,8 +406,8 @@
a = regex.match(author)
if a:
- name = a.group(1)
- email = a.group(2)
+ name = self.get_valid_git_username_email(a.group(1))
+ email = self.get_valid_git_username_email(a.group(2))
if a.group(3) != None and len(a.group(3)) != 0:
name += ' ext:(' + urllib.quote(a.group(3)) + ')'
author = self.get_valid_git_username_email(name) + ' <' + self.get_valid_git_username_email(email) + '>'
> This patch should be applied to the current tip
> (1189e52ba27c468788de4ed2cd93eef1548ec413). I also deindented the doctest
> since that seems to be what you prefer.
Thanks, will look tomorrow.
> For some reason I am unable to run
> `make tests` properly since I folded the patches together - the tester
> skips 22 of the 24 tests. I didn't change any actual code since the last
> time I ran the tests, though, so that doesn't really worry me.
You probably leaked a git-daemon somewhere. Try 'killall git-daemon' and see if things recover.
> I'm running Mercurial 2.1 so I already have phases, but I'm not sure how
> you think they will help this kind of situation. They did present one more
> obstacle when I was trying to qimport the commits I had already pushed to
> bitbucket :)
In theory, you should have pushed to bitbucket using the 'draft' phase, which is public but mutable. In that case, this wouldn't be (as much of) an issue. Future plans for history mutation include a way to handle exactly this situation gracefully.
>
> -Keshav
>
> --
> You received this message because you are subscribed to the Google Groups "hg-git" group.
> To view this discussion on the web visit https://groups.google.com/d/msg/hg-git/-/URfNlJJdvJMJ.
> --
> You received this message because you are subscribed to the Google Groups "hg-git" group.