cgit

commit 0a799424f682071da9f5b632d1394308e9255bb5

Author: Julius Plenz <plenz@cis.fu-berlin.de>

fix two encoding bugs

reencode() takes three arguments in the order (txt, from, to), opposed to
reencode_string, which will, like iconv, handle the arguments with from
and to swapped. Fix that (this makes reencode more intuitive).
If src and dst encoding are equivalent, don't do any encoding.

If no special encoding parameter is found within the commit, assume
UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode()
mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example.

Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de>
Signed-off-by: Lars Hjemli <hjemli@gmail.com>

 parsing.c | 24 +++++++++++++++---------


diff --git a/parsing.c b/parsing.c
index f37c49d261765636cfef652faec3be8baf6d2894..c9e43509242d1d3a4e6faf85da7ce46375659ab3 100644
--- a/parsing.c
+++ b/parsing.c
@@ -106,7 +106,11 @@
 	if (!txt || !*txt || !src_enc || !dst_enc)
 		return *txt;
 
-	tmp = reencode_string(*txt, src_enc, dst_enc);
+	/* no encoding needed if src_enc equals dst_enc */
+	if(!strcasecmp(src_enc, dst_enc))
+		return *txt;
+
+	tmp = reencode_string(*txt, dst_enc, src_enc);
 	if (tmp) {
 		free(*txt);
 		*txt = tmp;
@@ -160,6 +164,10 @@ 			p = t + 1;
 		}
 	}
 
+	/* if no special encoding is found, assume UTF-8 */
+	if(!ret->msg_encoding)
+		ret->msg_encoding = xstrdup("UTF-8");
+
 	// skip unknown header fields
 	while (p && *p && (*p != '\n')) {
 		p = strchr(p, '\n');
@@ -189,14 +197,12 @@ 			ret->msg = xstrdup(p);
 	} else
 		ret->subject = xstrdup(p);
 
-	if (ret->msg_encoding) {
-		reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding);
-		reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding);
-		reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding);
-		reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding);
-		reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding);
-		reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding);
-	}
+	reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING);
+	reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING);
 
 	return ret;
 }