e-convert.js: Missing line breaks with nested paragraphs
When the HTML structure had nested paragraphs (DIV inside DIV), then the resulting plain text version could have missing line breaks.
This commit is contained in:
@ -684,6 +684,24 @@ EvoConvert.ImgToText = function(img)
|
|||||||
return txt ? txt : "";
|
return txt ? txt : "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EvoConvert.appendNodeText = function(node, str, text)
|
||||||
|
{
|
||||||
|
/* This breaks "-- <br>", thus disable it for now. Cannot distinguish from test 70 of /EWebView/ConvertToPlain.
|
||||||
|
|
||||||
|
if (node && node.parentElement && text.startsWith('\n') && str.endsWith(" ")) {
|
||||||
|
var whiteSpace = "normal";
|
||||||
|
|
||||||
|
if (node.parentElement)
|
||||||
|
whiteSpace = window.getComputedStyle(node.parentElement).whiteSpace;
|
||||||
|
|
||||||
|
if (!whiteSpace || whiteSpace == "normal") {
|
||||||
|
return str.substr(0, str.length - 1) + text;
|
||||||
|
}
|
||||||
|
} */
|
||||||
|
|
||||||
|
return str + text;
|
||||||
|
}
|
||||||
|
|
||||||
EvoConvert.extractElemText = function(elem, normalDivWidth, quoteLevel)
|
EvoConvert.extractElemText = function(elem, normalDivWidth, quoteLevel)
|
||||||
{
|
{
|
||||||
if (!elem)
|
if (!elem)
|
||||||
@ -700,7 +718,7 @@ EvoConvert.extractElemText = function(elem, normalDivWidth, quoteLevel)
|
|||||||
if (!node)
|
if (!node)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
str += EvoConvert.processNode(node, normalDivWidth, quoteLevel);
|
str = EvoConvert.appendNodeText(node, str, EvoConvert.processNode(node, normalDivWidth, quoteLevel));
|
||||||
}
|
}
|
||||||
|
|
||||||
return str;
|
return str;
|
||||||
@ -731,7 +749,7 @@ EvoConvert.mergeConsecutiveSpaces = function(str)
|
|||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
EvoConvert.RemoveInsignificantNewLines = function(node)
|
EvoConvert.RemoveInsignificantNewLines = function(node, stripSingleSpace)
|
||||||
{
|
{
|
||||||
var str = "";
|
var str = "";
|
||||||
|
|
||||||
@ -774,6 +792,11 @@ EvoConvert.RemoveInsignificantNewLines = function(node)
|
|||||||
}
|
}
|
||||||
|
|
||||||
str = EvoConvert.mergeConsecutiveSpaces(str.replace(/\t/g, " ").replace(/\r/g, " ").replace(/\n/g, " "));
|
str = EvoConvert.mergeConsecutiveSpaces(str.replace(/\t/g, " ").replace(/\r/g, " ").replace(/\n/g, " "));
|
||||||
|
|
||||||
|
if ((!whiteSpace || whiteSpace == "normal") && str == " " && (stripSingleSpace || (
|
||||||
|
!node.nextElementSibling || node.nextElementSibling.tagName == "DIV" || node.nextElementSibling.tagName == "P" || node.nextElementSibling.tagName == "PRE"))) {
|
||||||
|
str = "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -876,6 +899,13 @@ EvoConvert.processNode = function(node, normalDivWidth, quoteLevel)
|
|||||||
}
|
}
|
||||||
|
|
||||||
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, whiteSpace, width, extraIndent, liText, quoteLevel);
|
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, whiteSpace, width, extraIndent, liText, quoteLevel);
|
||||||
|
|
||||||
|
if (!liText && node.parentElement && (node.parentElement.tagName == "DIV" || node.parentElement.tagName == "P") &&
|
||||||
|
style.display == "block" && str != "" && node.previousSibling &&
|
||||||
|
((node.previousSibling.nodeType == node.ELEMENT_NODE && node.previousSibling.tagName != "DIV" && node.previousSibling.tagName != "P" && node.previousSibling.tagName != "BR") ||
|
||||||
|
(node.previousSibling.nodeType == node.TEXT_NODE && EvoConvert.RemoveInsignificantNewLines(node.previousSibling, true) != ""))) {
|
||||||
|
str = "\n" + str;
|
||||||
|
}
|
||||||
} else if (node.tagName == "PRE") {
|
} else if (node.tagName == "PRE") {
|
||||||
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, "pre", -1, 0, "", quoteLevel);
|
str = EvoConvert.formatParagraph(EvoConvert.extractElemText(node, normalDivWidth, quoteLevel), ltr, align, indent, "pre", -1, 0, "", quoteLevel);
|
||||||
} else if (node.tagName == "BR") {
|
} else if (node.tagName == "BR") {
|
||||||
@ -974,7 +1004,7 @@ EvoConvert.ToPlainText = function(element, normalDivWidth)
|
|||||||
if (!node)
|
if (!node)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
str += EvoConvert.processNode(node, normalDivWidth, 0);
|
str = EvoConvert.appendNodeText(node, str, EvoConvert.processNode(node, normalDivWidth, 0));
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (disconnectFromHead)
|
if (disconnectFromHead)
|
||||||
|
@ -2623,6 +2623,63 @@ test_convert_to_plain (TestFixture *fixture)
|
|||||||
"----------\n"
|
"----------\n"
|
||||||
"--------\n"
|
"--------\n"
|
||||||
"123\n",
|
"123\n",
|
||||||
|
10 },
|
||||||
|
/* 68 */{ HTML ("<div>123<div>456</div><div><br></div><div>7 8 9<b>b</b><div>abc</div>def<br><div>ghi</div></div></div>"),
|
||||||
|
"123\n"
|
||||||
|
"456\n"
|
||||||
|
"\n"
|
||||||
|
"7 8 9b\n"
|
||||||
|
"abc\n"
|
||||||
|
"def\n"
|
||||||
|
"ghi\n",
|
||||||
|
10 },
|
||||||
|
/* 69 */{ HTML ("<div>123<div>456</div><div><br></div><div><div>7 8 9<b>b</b></div><div>abc</div>def<br><div>ghi</div></div></div>"),
|
||||||
|
"123\n"
|
||||||
|
"456\n"
|
||||||
|
"\n"
|
||||||
|
"7 8 9b\n"
|
||||||
|
"abc\n"
|
||||||
|
"def\n"
|
||||||
|
"ghi\n",
|
||||||
|
10 },
|
||||||
|
/* 70 */{ HTML ("<div>123\n"
|
||||||
|
" <div>456</div>\n"
|
||||||
|
" <div><br></div>\n"
|
||||||
|
" <div>\n"
|
||||||
|
" <div>7 8 9<b>b</b></div>\n"
|
||||||
|
" <div>abc</div>\n"
|
||||||
|
" def<br>\n"
|
||||||
|
" <div>ghi</div>\n"
|
||||||
|
" </div>\n"
|
||||||
|
"</div>"),
|
||||||
|
"123 \n" /* The space should not be there, but see EvoConvert.appendNodeText() */
|
||||||
|
"456\n"
|
||||||
|
"\n"
|
||||||
|
"7 8 9b\n"
|
||||||
|
"abc\n"
|
||||||
|
"def\n"
|
||||||
|
"ghi\n",
|
||||||
|
10 },
|
||||||
|
/* 71 */{ HTML ("<div>aaa bbb,\n"
|
||||||
|
"<div><div><br></div>\n"
|
||||||
|
"<div>cc dd ee\n"
|
||||||
|
"</div>\n"
|
||||||
|
"<div><br></div>\n"
|
||||||
|
"<div>ff,<b>gg</b></div>\n"
|
||||||
|
"<div>-- <br>\n"
|
||||||
|
" <div>\n"
|
||||||
|
" <div>hh ii<div>jj kk</div>\n"
|
||||||
|
" </div>\n"
|
||||||
|
"</div>\n"
|
||||||
|
"</div></div></div>\n"),
|
||||||
|
"aaa bbb,\n"
|
||||||
|
"\n"
|
||||||
|
"cc dd ee\n"
|
||||||
|
"\n"
|
||||||
|
"ff,gg\n"
|
||||||
|
"-- \n"
|
||||||
|
"hh ii\n"
|
||||||
|
"jj kk\n",
|
||||||
10 }
|
10 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user