Skip to content

Commit 9de391b

Browse files
isolovevQt Cherry-pick Bot
authored andcommitted
QXmlStreamReader: test adding data in different encodings
Currently all the tests are expected to pass because we always convert everything to UTF-8. This is a pre-requisite for the follow-up patches that would try to optimize the internal logic to minimize the number of encoding conversions. Task-number: QTBUG-124636 Pick-to: 6.5 Change-Id: I0ac9212aeb8ccc768393e80c9e0d704fdc227b56 Reviewed-by: Thiago Macieira <[email protected]> (cherry picked from commit 900d1da) Reviewed-by: Qt Cherry-pick Bot <[email protected]> (cherry picked from commit f0dbb09)
1 parent f58657e commit 9de391b

File tree

1 file changed

+188
-0
lines changed

1 file changed

+188
-0
lines changed

tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <QTest>
1111
#include <QtTest/private/qcomparisontesthelper_p.h>
1212
#include <QUrl>
13+
#include <QVarLengthArray>
1314
#include <QXmlStreamReader>
1415
#include <QBuffer>
1516
#include <QStack>
@@ -573,6 +574,8 @@ private slots:
573574
void readLatin1Document() const;
574575
void appendToRawDocumentWithNonUtf8Encoding_data();
575576
void appendToRawDocumentWithNonUtf8Encoding();
577+
void appendDifferentEncodingsWithoutXmlProlog_data();
578+
void appendDifferentEncodingsWithoutXmlProlog();
576579
void readNextStartElement() const;
577580
void readElementText() const;
578581
void readElementText_data() const;
@@ -1328,6 +1331,191 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding()
13281331
QCOMPARE(text, expectedNextElementText);
13291332
}
13301333

1334+
struct DataAndEncoding
1335+
{
1336+
enum Encoding : quint8 {
1337+
Raw = 0,
1338+
Latin1,
1339+
Utf8,
1340+
Utf16
1341+
};
1342+
1343+
QByteArray data;
1344+
Encoding encoding;
1345+
1346+
DataAndEncoding(const QByteArray &d, Encoding e)
1347+
: data(d), encoding(e)
1348+
{}
1349+
DataAndEncoding(const QString &str)
1350+
: data(asUtf16ByteArray(str)), encoding(Encoding::Utf16)
1351+
{}
1352+
1353+
static QByteArray asUtf16ByteArray(const QString &input)
1354+
{
1355+
return QByteArray{reinterpret_cast<const char *>(input.utf16()), input.size() * 2};
1356+
}
1357+
1358+
QAnyStringView toAnyStringView() const
1359+
{
1360+
switch (encoding) {
1361+
case Latin1:
1362+
return QLatin1StringView{data};
1363+
case Utf8:
1364+
return QUtf8StringView{data};
1365+
case Utf16:
1366+
Q_ASSERT(data.size() % 2 == 0);
1367+
return QStringView{reinterpret_cast<const char16_t *>(data.data()), data.size() / 2};
1368+
case Raw:
1369+
// Impossible to convert to QASV in general case
1370+
Q_UNREACHABLE_RETURN({});
1371+
}
1372+
}
1373+
1374+
// for next_permutation
1375+
friend bool operator<(const DataAndEncoding &lhs, const DataAndEncoding &rhs)
1376+
{
1377+
return lhs.encoding < rhs.encoding;
1378+
}
1379+
};
1380+
1381+
void tst_QXmlStream::appendDifferentEncodingsWithoutXmlProlog_data()
1382+
{
1383+
QTest::addColumn<QList<DataAndEncoding>>("inputs");
1384+
QTest::addColumn<QString>("expectedResult");
1385+
1386+
const QByteArray u8Str = "ΔΩΘ";
1387+
const QByteArray l1Str = "\xC4\xD6\xDC"; // ÄÖÜ
1388+
const QByteArray rawDataUtf8 = "\xf0\x9f\x98\x82"; // FACE WITH TEARS OF JOY (U+1F602)
1389+
const QString u16Str = u"\U0001F60E"_s; // SMILING FACE WITH SUNGLASSES (U+1F60E)
1390+
1391+
using Enc = DataAndEncoding::Encoding;
1392+
1393+
QVarLengthArray<DataAndEncoding> inputs{ DataAndEncoding{u8Str, Enc::Utf8},
1394+
DataAndEncoding{l1Str, Enc::Latin1},
1395+
DataAndEncoding{rawDataUtf8, Enc::Raw},
1396+
DataAndEncoding{u16Str} };
1397+
1398+
// Helper function to populate test data
1399+
auto encToName = [](Enc e) -> QByteArray {
1400+
switch (e) {
1401+
case Enc::Raw:
1402+
return "bytes"_ba;
1403+
case Enc::Latin1:
1404+
return "l1"_ba;
1405+
case Enc::Utf8:
1406+
return "u8"_ba;
1407+
case Enc::Utf16:
1408+
return "u16"_ba;
1409+
}
1410+
Q_UNREACHABLE_RETURN("");
1411+
};
1412+
auto adjustFirst = [](const DataAndEncoding &input) -> DataAndEncoding {
1413+
QByteArray newData = input.data;
1414+
if (input.encoding == Enc::Utf16)
1415+
newData.prepend(DataAndEncoding::asUtf16ByteArray(u"<a>"_s));
1416+
else
1417+
newData.prepend("<a>"_ba);
1418+
return {newData, input.encoding};
1419+
};
1420+
auto adjustLast = [](const DataAndEncoding &input) -> DataAndEncoding {
1421+
QByteArray newData = input.data;
1422+
if (input.encoding == Enc::Utf16)
1423+
newData.append(DataAndEncoding::asUtf16ByteArray(u"</a>"_s));
1424+
else
1425+
newData.append("</a>"_ba);
1426+
return {newData, input.encoding};
1427+
};
1428+
auto dataToString = [](const DataAndEncoding &input) -> QString {
1429+
if (input.encoding == Enc::Raw) {
1430+
// This function treats raw data as UTF-8
1431+
return QString::fromUtf8(input.data);
1432+
}
1433+
return input.toAnyStringView().toString();
1434+
};
1435+
// Iterate over all permutations of the list.
1436+
// Sort the list first, to cover all cases
1437+
std::sort(inputs.begin(), inputs.end());
1438+
do {
1439+
const auto lastIdx = inputs.size() - 1;
1440+
QByteArray testName;
1441+
QList<DataAndEncoding> inputData;
1442+
QString expectedResult;
1443+
for (qsizetype i = 0; i <= lastIdx; ++i) {
1444+
const auto &item = inputs[i];
1445+
testName += encToName(item.encoding);
1446+
if (i != lastIdx)
1447+
testName.append('+');
1448+
if (i == 0)
1449+
inputData.append(adjustFirst(item));
1450+
else if (i == lastIdx)
1451+
inputData.append(adjustLast(item));
1452+
else
1453+
inputData.append(item);
1454+
expectedResult.append(dataToString(item));
1455+
}
1456+
QTest::newRow(testName.constData()) << inputData << expectedResult;
1457+
} while (std::next_permutation(inputs.begin(), inputs.end()));
1458+
1459+
// plus add some corner cases
1460+
1461+
QTest::newRow("u8+bytes_FACE_WITH_TEARS_OF_JOY")
1462+
<< QList{ DataAndEncoding{"<a>\xf0\x9f"_ba, Enc::Utf8},
1463+
DataAndEncoding{"\x98\x82</a>"_ba, Enc::Raw} }
1464+
<< u"\U0001F602"_s;
1465+
1466+
// The test tries to read FACE IN CLOUDS emoji.
1467+
// Its full representation is:
1468+
// - FACE WITHOUT MOUTH: U+1F636 or \xf0\x9f\x98\xb6;
1469+
// - ZERO WIDTH JOINER: U+200D or \xe2\x80\x8d;
1470+
// - FOG: U+1F32B or \xf0\x9f\x8c\xab;
1471+
// - VARIATION SELECTOR-16: U+FE0F or \xef\xb8\x8f.
1472+
// This test tries to encode a part of it as UTF-8, and the rest as UTF-16.
1473+
// Important is that we need to break at the borders of the characters
1474+
QTest::newRow("u8+u16_FACE_IN_CLOUDS")
1475+
<< QList{ DataAndEncoding{"<a>\xf0\x9f\x98\xb6\xe2\x80\x8d"_ba, Enc::Utf8},
1476+
DataAndEncoding{u"\U0001F32B\uFE0F</a>"_s} }
1477+
<< u"\U0001F636\u200D\U0001F32B\uFE0F"_s;
1478+
}
1479+
1480+
void tst_QXmlStream::appendDifferentEncodingsWithoutXmlProlog()
1481+
{
1482+
QFETCH(const QList<DataAndEncoding>, inputs);
1483+
QFETCH(const QString, expectedResult);
1484+
1485+
{
1486+
QXmlStreamReader reader;
1487+
for (const auto &data : inputs) {
1488+
if (data.encoding == DataAndEncoding::Raw)
1489+
reader.addData(data.data);
1490+
else
1491+
reader.addData(data.toAnyStringView());
1492+
}
1493+
QVERIFY(reader.readNextStartElement());
1494+
const QString text = reader.readElementText();
1495+
QCOMPARE(text, expectedResult);
1496+
}
1497+
// same with c-tor
1498+
{
1499+
std::unique_ptr<QXmlStreamReader> reader = nullptr;
1500+
for (const auto &data : inputs) {
1501+
if (!reader) {
1502+
if (data.encoding == DataAndEncoding::Raw)
1503+
reader = std::make_unique<QXmlStreamReader>(data.data);
1504+
else
1505+
reader = std::make_unique<QXmlStreamReader>(data.toAnyStringView());
1506+
} else {
1507+
if (data.encoding == DataAndEncoding::Raw)
1508+
reader->addData(data.data);
1509+
else
1510+
reader->addData(data.toAnyStringView());
1511+
}
1512+
}
1513+
QVERIFY(reader->readNextStartElement());
1514+
const QString text = reader->readElementText();
1515+
QCOMPARE(text, expectedResult);
1516+
}
1517+
}
1518+
13311519
void tst_QXmlStream::readNextStartElement() const
13321520
{
13331521
QLatin1String in("<?xml version=\"1.0\"?><A><!-- blah --><B><C/></B><B attr=\"value\"/>text</A>");

0 commit comments

Comments
 (0)