[PATCH v2 0/5] Fix and extend encoding handling in fast export/import
- Date: Tue, 30 Apr 2019 11:25:18 -0700
- From: Elijah Newren <newren@xxxxxxxxx>
- Subject: [PATCH v2 0/5] Fix and extend encoding handling in fast export/import
While stress testing `git filter-repo`, I noticed an issue with
encoding; further digging led to the fixes and features in this series.
See the individual commit messages for details.
Changes since v1 (full range-diff below):
* Applied style fixes Eric pointed out in his review (thanks!)
* Rebased on latest master (83232e38, "The seventh batch"), resolving
a trivial merge conflict. Now merges cleanly with next and pu as
well.
I'm a bit under the weather so I may be slow to respond...
Elijah Newren (5):
t9350: fix encoding test to actually test reencoding
fast-import: support 'encoding' commit header
fast-export: avoid stripping encoding header if we cannot reencode
fast-export: differentiate between explicitly utf-8 and implicitly
utf-8
fast-export: do automatic reencoding of commit messages only if
requested
Documentation/git-fast-import.txt | 7 ++++
builtin/fast-export.c | 44 +++++++++++++++++++++----
fast-import.c | 11 +++++--
t/t9300-fast-import.sh | 20 ++++++++++++
t/t9350-fast-export.sh | 53 +++++++++++++++++++++++++------
5 files changed, 118 insertions(+), 17 deletions(-)
Range-diff:
1: d6efd05142 ! 1: 9cc04242bd t9350: fix encoding test to actually test reencoding
@@ -26,8 +26,7 @@
- # use author and committer name in ISO-8859-1 to match it.
- . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
+ test_when_finished "git reset --hard HEAD~1" &&
-+ test_when_finished "git config --unset i18n.commitencoding" &&
-+ git config i18n.commitencoding iso-8859-7 &&
++ test_config i18n.commitencoding iso-8859-7 &&
test_tick &&
echo rosten >file &&
- git commit -s -m den file &&
2: 02f48c7559 ! 2: 0cd023ac7a fast-import: support 'encoding' commit header
@@ -51,9 +51,8 @@
}
if (!committer)
die("Expected committer but didn't get one");
-+ if (skip_prefix(command_buf.buf, "encoding ", &encoding)) {
++ if (skip_prefix(command_buf.buf, "encoding ", &encoding))
+ read_next_command();
-+ }
parse_data(&msg, 0, NULL);
read_next_command();
parse_from(b);
@@ -69,7 +68,7 @@
+ strbuf_addf(&new_data,
+ "encoding %s\n",
+ encoding);
-+ strbuf_addf(&new_data, "\n");
++ strbuf_addch(&new_data, '\n');
strbuf_addbuf(&new_data, &msg);
free(author);
free(committer);
@@ -78,14 +77,14 @@
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@
- background_import_still_running
+ sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
'
+###
-+### series W (other new features)
++### series X (other new features)
+###
+
-+test_expect_success 'W: handling encoding' '
++test_expect_success 'X: handling encoding' '
+ test_tick &&
+ cat >input <<-INPUT_END &&
+ commit refs/heads/encoding
3: 86c348402d ! 3: 1fddf51402 fast-export: avoid stripping encoding header if we cannot reencode
@@ -41,8 +41,7 @@
+test_expect_success 'encoding preserved if reencoding fails' '
+
+ test_when_finished "git reset --hard HEAD~1" &&
-+ test_when_finished "git config --unset i18n.commitencoding" &&
-+ git config i18n.commitencoding iso-8859-7 &&
++ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file &&
+ git fast-export wer^..wer >iso-8859-7.fi &&
4: c09b23bc59 = 4: 4a2e04b3ae fast-export: differentiate between explicitly utf-8 and implicitly utf-8
5: 24b69a0db9 ! 5: 44aacb1a0b fast-export: do automatic reencoding of commit messages only if requested
@@ -92,8 +92,7 @@
+test_expect_success 'reencoding iso-8859-7' '
test_when_finished "git reset --hard HEAD~1" &&
- test_when_finished "git config --unset i18n.commitencoding" &&
-@@
+ test_config i18n.commitencoding iso-8859-7 &&
test_tick &&
echo rosten >file &&
git commit -s -m "$(printf "Pi: \360")" file &&
@@ -109,8 +108,7 @@
+test_expect_success 'aborting on iso-8859-7' '
+
+ test_when_finished "git reset --hard HEAD~1" &&
-+ test_when_finished "git config --unset i18n.commitencoding" &&
-+ git config i18n.commitencoding iso-8859-7 &&
++ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -m "$(printf "Pi: \360")" file &&
+ test_must_fail git fast-export --reencode=abort wer^..wer >iso-8859-7.fi
@@ -119,8 +117,7 @@
+test_expect_success 'preserving iso-8859-7' '
+
+ test_when_finished "git reset --hard HEAD~1" &&
-+ test_when_finished "git config --unset i18n.commitencoding" &&
-+ git config i18n.commitencoding iso-8859-7 &&
++ test_config i18n.commitencoding iso-8859-7 &&
+ echo rosten >file &&
+ git commit -s -m "$(printf "Pi: \360")" file &&
+ git fast-export --reencode=no wer^..wer >iso-8859-7.fi &&
@@ -134,8 +131,7 @@
test_expect_success 'encoding preserved if reencoding fails' '
test_when_finished "git reset --hard HEAD~1" &&
-@@
- git config i18n.commitencoding iso-8859-7 &&
+ test_config i18n.commitencoding iso-8859-7 &&
echo rosten >file &&
git commit -s -m "$(printf "Pi: \360; Invalid: \377")" file &&
- git fast-export wer^..wer >iso-8859-7.fi &&
--
2.21.0.782.g44aacb1a0b