|
5 | 5 | #include "Row.hpp" |
6 | 6 |
|
7 | 7 | #include <isa_availability.h> |
8 | | -#include <til/unicode.h> |
9 | 8 |
|
10 | | -#include "textBuffer.hpp" |
11 | | -#include "../../types/inc/GlyphWidth.hpp" |
| 9 | +#include "../../types/inc/CodepointWidthDetector.hpp" |
12 | 10 |
|
13 | 11 | // It would be nice to add checked array access in the future, but it's a little annoying to do so without impacting |
14 | 12 | // performance (including Debug performance). Other languages are a little bit more ergonomic there than C++. |
@@ -568,6 +566,7 @@ void ROW::ReplaceAttributes(const til::CoordType beginIndex, const til::CoordTyp |
568 | 566 | void ROW::ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars) |
569 | 567 | try |
570 | 568 | { |
| 569 | + assert(width >= 1 && width <= 2); |
571 | 570 | WriteHelper h{ *this, columnBegin, _columnCount, chars }; |
572 | 571 | if (!h.IsValid()) |
573 | 572 | { |
@@ -666,56 +665,89 @@ catch (...) |
666 | 665 |
|
667 | 666 | [[msvc::forceinline]] void ROW::WriteHelper::_replaceTextUnicode(size_t ch, std::wstring_view::const_iterator it) noexcept |
668 | 667 | { |
669 | | - const auto end = chars.end(); |
| 668 | + auto& cwd = CodepointWidthDetector::Singleton(); |
670 | 669 |
|
671 | | - while (it != end) |
| 670 | + // Check if the new text joins with the existing contents of the row to form a single grapheme cluster. |
| 671 | + if (it == chars.begin()) |
672 | 672 | { |
673 | | - unsigned int width = 1; |
674 | | - auto ptr = &*it; |
675 | | - const auto wch = *ptr; |
676 | | - size_t advance = 1; |
| 673 | + auto colPrev = colBeg; |
| 674 | + while (colPrev > 0 && row._uncheckedIsTrailer(--colPrev)) |
| 675 | + { |
| 676 | + } |
677 | 677 |
|
678 | | - ++it; |
| 678 | + const auto chPrev = row._uncheckedCharOffset(colPrev); |
| 679 | + const std::wstring_view charsPrev{ row._chars.data() + chPrev, ch - chPrev }; |
679 | 680 |
|
680 | | - // Even in our slow-path we can avoid calling IsGlyphFullWidth if the current character is ASCII. |
681 | | - // It also allows us to skip the surrogate pair decoding at the same time. |
682 | | - if (wch >= 0x80) |
| 681 | + GraphemeState state; |
| 682 | + cwd.GraphemeNext(state, charsPrev); |
| 683 | + cwd.GraphemeNext(state, chars); |
| 684 | + |
| 685 | + if (state.len > 0) |
683 | 686 | { |
684 | | - if (til::is_surrogate(wch)) |
| 687 | + colBegDirty = colPrev; |
| 688 | + colEnd = colPrev; |
| 689 | + |
| 690 | + const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width); |
| 691 | + if (colEndNew > colLimit) |
685 | 692 | { |
686 | | - if (it != end && til::is_leading_surrogate(wch) && til::is_trailing_surrogate(*it)) |
687 | | - { |
688 | | - advance = 2; |
689 | | - ++it; |
690 | | - } |
691 | | - else |
692 | | - { |
693 | | - ptr = &UNICODE_REPLACEMENT; |
694 | | - } |
| 693 | + colEndDirty = colLimit; |
| 694 | + charsConsumed = ch - chBeg; |
| 695 | + return; |
695 | 696 | } |
696 | 697 |
|
697 | | - width = IsGlyphFullWidth({ ptr, advance }) + 1u; |
698 | | - } |
| 698 | + // Fill our char-offset buffer with 1 entry containing the mapping from the |
| 699 | + // current column (colEnd) to the start of the glyph in the string (ch)... |
| 700 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev); |
| 701 | + // ...followed by 0-N entries containing an indication that the |
| 702 | + // columns are just a wide-glyph extension of the preceding one. |
| 703 | + while (colEnd < colEndNew) |
| 704 | + { |
| 705 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(chPrev | CharOffsetsTrailer); |
| 706 | + } |
699 | 707 |
|
700 | | - const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + width); |
701 | | - if (colEndNew > colLimit) |
702 | | - { |
703 | | - colEndDirty = colLimit; |
704 | | - charsConsumed = ch - chBeg; |
705 | | - return; |
| 708 | + ch += state.len; |
| 709 | + it += state.len; |
706 | 710 | } |
| 711 | + } |
| 712 | + else |
| 713 | + { |
| 714 | + // The non-ASCII character we have encountered may be a combining mark, like "a^" which is then displayed as "â". |
| 715 | + // In order to recognize both characters as a single grapheme, we need to back up by 1 ASCII character |
| 716 | + // and let MeasureNext() find the next proper grapheme boundary. |
| 717 | + --colEnd; |
| 718 | + --ch; |
| 719 | + --it; |
| 720 | + } |
| 721 | + |
| 722 | + if (const auto end = chars.end(); it != end) |
| 723 | + { |
| 724 | + GraphemeState state{ .beg = &*it }; |
707 | 725 |
|
708 | | - // Fill our char-offset buffer with 1 entry containing the mapping from the |
709 | | - // current column (colEnd) to the start of the glyph in the string (ch)... |
710 | | - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch); |
711 | | - // ...followed by 0-N entries containing an indication that the |
712 | | - // columns are just a wide-glyph extension of the preceding one. |
713 | | - while (colEnd < colEndNew) |
| 726 | + do |
714 | 727 | { |
715 | | - til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer); |
716 | | - } |
| 728 | + cwd.GraphemeNext(state, chars); |
| 729 | + |
| 730 | + const auto colEndNew = gsl::narrow_cast<uint16_t>(colEnd + state.width); |
| 731 | + if (colEndNew > colLimit) |
| 732 | + { |
| 733 | + colEndDirty = colLimit; |
| 734 | + charsConsumed = ch - chBeg; |
| 735 | + return; |
| 736 | + } |
| 737 | + |
| 738 | + // Fill our char-offset buffer with 1 entry containing the mapping from the |
| 739 | + // current column (colEnd) to the start of the glyph in the string (ch)... |
| 740 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch); |
| 741 | + // ...followed by 0-N entries containing an indication that the |
| 742 | + // columns are just a wide-glyph extension of the preceding one. |
| 743 | + while (colEnd < colEndNew) |
| 744 | + { |
| 745 | + til::at(row._charOffsets, colEnd++) = gsl::narrow_cast<uint16_t>(ch | CharOffsetsTrailer); |
| 746 | + } |
717 | 747 |
|
718 | | - ch += advance; |
| 748 | + ch += state.len; |
| 749 | + it += state.len; |
| 750 | + } while (it != end); |
719 | 751 | } |
720 | 752 |
|
721 | 753 | colEndDirty = colEnd; |
@@ -1058,7 +1090,7 @@ std::wstring_view ROW::GetText() const noexcept |
1058 | 1090 |
|
1059 | 1091 | std::wstring_view ROW::GetText(til::CoordType columnBegin, til::CoordType columnEnd) const noexcept |
1060 | 1092 | { |
1061 | | - const til::CoordType columns = _columnCount; |
| 1093 | + const auto columns = GetReadableColumnCount(); |
1062 | 1094 | const auto colBeg = clamp(columnBegin, 0, columns); |
1063 | 1095 | const auto colEnd = clamp(columnEnd, colBeg, columns); |
1064 | 1096 | const size_t chBeg = _uncheckedCharOffset(gsl::narrow_cast<size_t>(colBeg)); |
|
0 commit comments