|
10 | 10 | #include <QTest> |
11 | 11 | #include <QtTest/private/qcomparisontesthelper_p.h> |
12 | 12 | #include <QUrl> |
| 13 | +#include <QVarLengthArray> |
13 | 14 | #include <QXmlStreamReader> |
14 | 15 | #include <QBuffer> |
15 | 16 | #include <QStack> |
@@ -573,6 +574,8 @@ private slots: |
573 | 574 | void readLatin1Document() const; |
574 | 575 | void appendToRawDocumentWithNonUtf8Encoding_data(); |
575 | 576 | void appendToRawDocumentWithNonUtf8Encoding(); |
| 577 | + void appendDifferentEncodingsWithoutXmlProlog_data(); |
| 578 | + void appendDifferentEncodingsWithoutXmlProlog(); |
576 | 579 | void readNextStartElement() const; |
577 | 580 | void readElementText() const; |
578 | 581 | void readElementText_data() const; |
@@ -1328,6 +1331,191 @@ void tst_QXmlStream::appendToRawDocumentWithNonUtf8Encoding() |
1328 | 1331 | QCOMPARE(text, expectedNextElementText); |
1329 | 1332 | } |
1330 | 1333 |
|
| 1334 | +struct DataAndEncoding |
| 1335 | +{ |
| 1336 | + enum Encoding : quint8 { |
| 1337 | + Raw = 0, |
| 1338 | + Latin1, |
| 1339 | + Utf8, |
| 1340 | + Utf16 |
| 1341 | + }; |
| 1342 | + |
| 1343 | + QByteArray data; |
| 1344 | + Encoding encoding; |
| 1345 | + |
| 1346 | + DataAndEncoding(const QByteArray &d, Encoding e) |
| 1347 | + : data(d), encoding(e) |
| 1348 | + {} |
| 1349 | + DataAndEncoding(const QString &str) |
| 1350 | + : data(asUtf16ByteArray(str)), encoding(Encoding::Utf16) |
| 1351 | + {} |
| 1352 | + |
| 1353 | + static QByteArray asUtf16ByteArray(const QString &input) |
| 1354 | + { |
| 1355 | + return QByteArray{reinterpret_cast<const char *>(input.utf16()), input.size() * 2}; |
| 1356 | + } |
| 1357 | + |
| 1358 | + QAnyStringView toAnyStringView() const |
| 1359 | + { |
| 1360 | + switch (encoding) { |
| 1361 | + case Latin1: |
| 1362 | + return QLatin1StringView{data}; |
| 1363 | + case Utf8: |
| 1364 | + return QUtf8StringView{data}; |
| 1365 | + case Utf16: |
| 1366 | + Q_ASSERT(data.size() % 2 == 0); |
| 1367 | + return QStringView{reinterpret_cast<const char16_t *>(data.data()), data.size() / 2}; |
| 1368 | + case Raw: |
| 1369 | + // Impossible to convert to QASV in general case |
| 1370 | + Q_UNREACHABLE_RETURN({}); |
| 1371 | + } |
| 1372 | + } |
| 1373 | + |
| 1374 | + // for next_permutation |
| 1375 | + friend bool operator<(const DataAndEncoding &lhs, const DataAndEncoding &rhs) |
| 1376 | + { |
| 1377 | + return lhs.encoding < rhs.encoding; |
| 1378 | + } |
| 1379 | +}; |
| 1380 | + |
| 1381 | +void tst_QXmlStream::appendDifferentEncodingsWithoutXmlProlog_data() |
| 1382 | +{ |
| 1383 | + QTest::addColumn<QList<DataAndEncoding>>("inputs"); |
| 1384 | + QTest::addColumn<QString>("expectedResult"); |
| 1385 | + |
| 1386 | + const QByteArray u8Str = "ΔΩΘ"; |
| 1387 | + const QByteArray l1Str = "\xC4\xD6\xDC"; // ÄÖÜ |
| 1388 | + const QByteArray rawDataUtf8 = "\xf0\x9f\x98\x82"; // FACE WITH TEARS OF JOY (U+1F602) |
| 1389 | + const QString u16Str = u"\U0001F60E"_s; // SMILING FACE WITH SUNGLASSES (U+1F60E) |
| 1390 | + |
| 1391 | + using Enc = DataAndEncoding::Encoding; |
| 1392 | + |
| 1393 | + QVarLengthArray<DataAndEncoding> inputs{ DataAndEncoding{u8Str, Enc::Utf8}, |
| 1394 | + DataAndEncoding{l1Str, Enc::Latin1}, |
| 1395 | + DataAndEncoding{rawDataUtf8, Enc::Raw}, |
| 1396 | + DataAndEncoding{u16Str} }; |
| 1397 | + |
| 1398 | + // Helper function to populate test data |
| 1399 | + auto encToName = [](Enc e) -> QByteArray { |
| 1400 | + switch (e) { |
| 1401 | + case Enc::Raw: |
| 1402 | + return "bytes"_ba; |
| 1403 | + case Enc::Latin1: |
| 1404 | + return "l1"_ba; |
| 1405 | + case Enc::Utf8: |
| 1406 | + return "u8"_ba; |
| 1407 | + case Enc::Utf16: |
| 1408 | + return "u16"_ba; |
| 1409 | + } |
| 1410 | + Q_UNREACHABLE_RETURN(""); |
| 1411 | + }; |
| 1412 | + auto adjustFirst = [](const DataAndEncoding &input) -> DataAndEncoding { |
| 1413 | + QByteArray newData = input.data; |
| 1414 | + if (input.encoding == Enc::Utf16) |
| 1415 | + newData.prepend(DataAndEncoding::asUtf16ByteArray(u"<a>"_s)); |
| 1416 | + else |
| 1417 | + newData.prepend("<a>"_ba); |
| 1418 | + return {newData, input.encoding}; |
| 1419 | + }; |
| 1420 | + auto adjustLast = [](const DataAndEncoding &input) -> DataAndEncoding { |
| 1421 | + QByteArray newData = input.data; |
| 1422 | + if (input.encoding == Enc::Utf16) |
| 1423 | + newData.append(DataAndEncoding::asUtf16ByteArray(u"</a>"_s)); |
| 1424 | + else |
| 1425 | + newData.append("</a>"_ba); |
| 1426 | + return {newData, input.encoding}; |
| 1427 | + }; |
| 1428 | + auto dataToString = [](const DataAndEncoding &input) -> QString { |
| 1429 | + if (input.encoding == Enc::Raw) { |
| 1430 | + // This function treats raw data as UTF-8 |
| 1431 | + return QString::fromUtf8(input.data); |
| 1432 | + } |
| 1433 | + return input.toAnyStringView().toString(); |
| 1434 | + }; |
| 1435 | + // Iterate over all permutations of the list. |
| 1436 | + // Sort the list first, to cover all cases |
| 1437 | + std::sort(inputs.begin(), inputs.end()); |
| 1438 | + do { |
| 1439 | + const auto lastIdx = inputs.size() - 1; |
| 1440 | + QByteArray testName; |
| 1441 | + QList<DataAndEncoding> inputData; |
| 1442 | + QString expectedResult; |
| 1443 | + for (qsizetype i = 0; i <= lastIdx; ++i) { |
| 1444 | + const auto &item = inputs[i]; |
| 1445 | + testName += encToName(item.encoding); |
| 1446 | + if (i != lastIdx) |
| 1447 | + testName.append('+'); |
| 1448 | + if (i == 0) |
| 1449 | + inputData.append(adjustFirst(item)); |
| 1450 | + else if (i == lastIdx) |
| 1451 | + inputData.append(adjustLast(item)); |
| 1452 | + else |
| 1453 | + inputData.append(item); |
| 1454 | + expectedResult.append(dataToString(item)); |
| 1455 | + } |
| 1456 | + QTest::newRow(testName.constData()) << inputData << expectedResult; |
| 1457 | + } while (std::next_permutation(inputs.begin(), inputs.end())); |
| 1458 | + |
| 1459 | + // plus add some corner cases |
| 1460 | + |
| 1461 | + QTest::newRow("u8+bytes_FACE_WITH_TEARS_OF_JOY") |
| 1462 | + << QList{ DataAndEncoding{"<a>\xf0\x9f"_ba, Enc::Utf8}, |
| 1463 | + DataAndEncoding{"\x98\x82</a>"_ba, Enc::Raw} } |
| 1464 | + << u"\U0001F602"_s; |
| 1465 | + |
| 1466 | + // The test tries to read FACE IN CLOUDS emoji. |
| 1467 | + // Its full representation is: |
| 1468 | + // - FACE WITHOUT MOUTH: U+1F636 or \xf0\x9f\x98\xb6; |
| 1469 | + // - ZERO WIDTH JOINER: U+200D or \xe2\x80\x8d; |
| 1470 | + // - FOG: U+1F32B or \xf0\x9f\x8c\xab; |
| 1471 | + // - VARIATION SELECTOR-16: U+FE0F or \xef\xb8\x8f. |
| 1472 | + // This test tries to encode a part of it as UTF-8, and the rest as UTF-16. |
| 1473 | + // Important is that we need to break at the borders of the characters |
| 1474 | + QTest::newRow("u8+u16_FACE_IN_CLOUDS") |
| 1475 | + << QList{ DataAndEncoding{"<a>\xf0\x9f\x98\xb6\xe2\x80\x8d"_ba, Enc::Utf8}, |
| 1476 | + DataAndEncoding{u"\U0001F32B\uFE0F</a>"_s} } |
| 1477 | + << u"\U0001F636\u200D\U0001F32B\uFE0F"_s; |
| 1478 | +} |
| 1479 | + |
| 1480 | +void tst_QXmlStream::appendDifferentEncodingsWithoutXmlProlog() |
| 1481 | +{ |
| 1482 | + QFETCH(const QList<DataAndEncoding>, inputs); |
| 1483 | + QFETCH(const QString, expectedResult); |
| 1484 | + |
| 1485 | + { |
| 1486 | + QXmlStreamReader reader; |
| 1487 | + for (const auto &data : inputs) { |
| 1488 | + if (data.encoding == DataAndEncoding::Raw) |
| 1489 | + reader.addData(data.data); |
| 1490 | + else |
| 1491 | + reader.addData(data.toAnyStringView()); |
| 1492 | + } |
| 1493 | + QVERIFY(reader.readNextStartElement()); |
| 1494 | + const QString text = reader.readElementText(); |
| 1495 | + QCOMPARE(text, expectedResult); |
| 1496 | + } |
| 1497 | + // same with c-tor |
| 1498 | + { |
| 1499 | + std::unique_ptr<QXmlStreamReader> reader = nullptr; |
| 1500 | + for (const auto &data : inputs) { |
| 1501 | + if (!reader) { |
| 1502 | + if (data.encoding == DataAndEncoding::Raw) |
| 1503 | + reader = std::make_unique<QXmlStreamReader>(data.data); |
| 1504 | + else |
| 1505 | + reader = std::make_unique<QXmlStreamReader>(data.toAnyStringView()); |
| 1506 | + } else { |
| 1507 | + if (data.encoding == DataAndEncoding::Raw) |
| 1508 | + reader->addData(data.data); |
| 1509 | + else |
| 1510 | + reader->addData(data.toAnyStringView()); |
| 1511 | + } |
| 1512 | + } |
| 1513 | + QVERIFY(reader->readNextStartElement()); |
| 1514 | + const QString text = reader->readElementText(); |
| 1515 | + QCOMPARE(text, expectedResult); |
| 1516 | + } |
| 1517 | +} |
| 1518 | + |
1331 | 1519 | void tst_QXmlStream::readNextStartElement() const |
1332 | 1520 | { |
1333 | 1521 | QLatin1String in("<?xml version=\"1.0\"?><A><!-- blah --><B><C/></B><B attr=\"value\"/>text</A>"); |
|
0 commit comments