e-convert.js: Missing line breaks with nested paragraphs
When the HTML structure had nested paragraphs (DIV inside DIV), then the resulting plain text version could have missing line breaks.
This commit is contained in:
@ -684,6 +684,24 @@ EvoConvert.ImgToText = function(img)
|
||||
return txt ? txt : "";
|
||||
}
|
||||
|
||||
EvoConvert.appendNodeText = function(node, str, text)
|
||||
{
|
||||
/* This breaks "-- <br>", thus disable it for now. Cannot distinguish from test 70 of /EWebView/ConvertToPlain.
|
||||
|
||||
if (node && node.parentElement && text.startsWith('\n') && str.endsWith(" ")) {
|
||||
var whiteSpace = "normal";
|
||||
|
||||
if (node.parentElement)
|
||||
whiteSpace = window.getComputedStyle(node.parentElement).whiteSpace;
|
||||
|
||||
if (!whiteSpace || whiteSpace == "normal") {
|
||||
return str.substr(0, str.length - 1) + text;
|
||||
}
|
||||
} */
|
||||
|
||||
return str + text;
|
||||
}
|
||||
|
||||
EvoConvert.extractElemText = function(elem, normalDivWidth, quoteLevel)
|
||||
{
|
||||
if (!elem)
|
||||
@ -700,7 +718,7 @@ EvoConvert.extractElemText = function(elem, normalDivWidth, quoteLevel)
|
||||
if (!node)
|
||||
continue;
|
||||
|
||||
str += EvoConvert.processNode(node, normalDivWidth, quoteLevel);
|
||||
str = EvoConvert.appendNodeText(node, str, EvoConvert.processNode(node, normalDivWidth, quoteLevel));
|
||||
}
|
||||
|
||||
return str;
|
||||
@ -731,7 +749,7 @@ EvoConvert.mergeConsecutiveSpaces = function(str)
|
||||
return str;
|
||||
}
|
||||
|
||||
EvoConvert.RemoveInsignificantNewLines = function(node)
|
||||
EvoConvert.RemoveInsignificantNewLines = function(node, stripSingleSpace)
|
||||
{
|
||||
var str = "";
|
||||
|
||||
@ -774,6 +792,11 @@ EvoConvert.RemoveInsignificantNewLines = function(node)
|
||||
}
|
||||
|
||||
str = EvoConvert.mergeConsecutiveSpaces(str.replace(/\t/g, " ").replace(/\r/g, " ").replace(/\n/g, " "));
|
||||
|
||||
if ((!whiteSpace || whiteSpace == "normal") && str == " " && (stripSingleSpace || (
|
||||
!node.nextElementSibling || node.nextElementSibling.tagName == "DIV" || node.nextElementSibling.tagName == "P" || node.nextElementSibling.tagName == "PRE"))) {
|
||||
str = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -876,6 +899,13 @@ EvoConvert.processNode = function(node, normalDivWidth, quoteLevel)
|
||||
}
|
||||
|
||||
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, whiteSpace, width, extraIndent, liText, quoteLevel);
|
||||
|
||||
if (!liText && node.parentElement && (node.parentElement.tagName == "DIV" || node.parentElement.tagName == "P") &&
|
||||
style.display == "block" && str != "" && node.previousSibling &&
|
||||
((node.previousSibling.nodeType == node.ELEMENT_NODE && node.previousSibling.tagName != "DIV" && node.previousSibling.tagName != "P" && node.previousSibling.tagName != "BR") ||
|
||||
(node.previousSibling.nodeType == node.TEXT_NODE && EvoConvert.RemoveInsignificantNewLines(node.previousSibling, true) != ""))) {
|
||||
str = "\n" + str;
|
||||
}
|
||||
} else if (node.tagName == "PRE") {
|
||||
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, "pre", -1, 0, "", quoteLevel);
|
||||
} else if (node.tagName == "BR") {
|
||||
@ -974,7 +1004,7 @@ EvoConvert.ToPlainText = function(element, normalDivWidth)
|
||||
if (!node)
|
||||
continue;
|
||||
|
||||
str += EvoConvert.processNode(node, normalDivWidth, 0);
|
||||
str = EvoConvert.appendNodeText(node, str, EvoConvert.processNode(node, normalDivWidth, 0));
|
||||
}
|
||||
} finally {
|
||||
if (disconnectFromHead)
|
||||
|
@ -2623,6 +2623,63 @@ test_convert_to_plain (TestFixture *fixture)
|
||||
"----------\n"
|
||||
"--------\n"
|
||||
"123\n",
|
||||
10 },
|
||||
/* 68 */{ HTML ("<div>123<div>456</div><div><br></div><div>7 8 9<b>b</b><div>abc</div>def<br><div>ghi</div></div></div>"),
|
||||
"123\n"
|
||||
"456\n"
|
||||
"\n"
|
||||
"7 8 9b\n"
|
||||
"abc\n"
|
||||
"def\n"
|
||||
"ghi\n",
|
||||
10 },
|
||||
/* 69 */{ HTML ("<div>123<div>456</div><div><br></div><div><div>7 8 9<b>b</b></div><div>abc</div>def<br><div>ghi</div></div></div>"),
|
||||
"123\n"
|
||||
"456\n"
|
||||
"\n"
|
||||
"7 8 9b\n"
|
||||
"abc\n"
|
||||
"def\n"
|
||||
"ghi\n",
|
||||
10 },
|
||||
/* 70 */{ HTML ("<div>123\n"
|
||||
" <div>456</div>\n"
|
||||
" <div><br></div>\n"
|
||||
" <div>\n"
|
||||
" <div>7 8 9<b>b</b></div>\n"
|
||||
" <div>abc</div>\n"
|
||||
" def<br>\n"
|
||||
" <div>ghi</div>\n"
|
||||
" </div>\n"
|
||||
"</div>"),
|
||||
"123 \n" /* The space should not be there, but see EvoConvert.appendNodeText() */
|
||||
"456\n"
|
||||
"\n"
|
||||
"7 8 9b\n"
|
||||
"abc\n"
|
||||
"def\n"
|
||||
"ghi\n",
|
||||
10 },
|
||||
/* 71 */{ HTML ("<div>aaa bbb,\n"
|
||||
"<div><div><br></div>\n"
|
||||
"<div>cc dd ee\n"
|
||||
"</div>\n"
|
||||
"<div><br></div>\n"
|
||||
"<div>ff,<b>gg</b></div>\n"
|
||||
"<div>-- <br>\n"
|
||||
" <div>\n"
|
||||
" <div>hh ii<div>jj kk</div>\n"
|
||||
" </div>\n"
|
||||
"</div>\n"
|
||||
"</div></div></div>\n"),
|
||||
"aaa bbb,\n"
|
||||
"\n"
|
||||
"cc dd ee\n"
|
||||
"\n"
|
||||
"ff,gg\n"
|
||||
"-- \n"
|
||||
"hh ii\n"
|
||||
"jj kk\n",
|
||||
10 }
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user