| Title | Performance improvements |
|---|---|
| Author | rodarima |
| Created | Sun, 13 Apr 2025 16:52:07 +0000 |
| State | open |
Rendering https://html.spec.whatwg.org/ several times (via refresh) leads to the following perf trace on armv7:
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 169K of event 'cycles:Pu'
# Event count (approx.): 33718562150
#
# Overhead Command Shared Object Symbol
# ........ ....... ....................... ................................................................................................................................................
#
7.03% dillo dillo [.] lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const
6.34% dillo ld-musl-armhf.so.1 [.] __strchrnul
2.69% dillo dillo [.] Html_write_raw(DilloHtml*, char*, int, int)
2.22% dillo ld-musl-armhf.so.1 [.] __libc_malloc_impl
1.91% dillo ld-musl-armhf.so.1 [.] strncmp
1.86% dillo dillo [.] lout::misc::NotSoSimpleVector<dw::Textblock::Word>::size() const
1.78% dillo dillo [.] dw::Textblock::accumulateWordData(int)
1.64% dillo libz.so.1.3.1 [.] crc32_z
1.51% dillo ld-musl-armhf.so.1 [.] get_meta
1.45% dillo dillo [.] CssStyleSheet::apply(CssPropertyList*, Doctree*, DoctreeNode const*, MatchCache*) const
1.37% dillo dillo [.] int lout::misc::max<int>(int, int)
1.29% dillo dillo [.] dw::Textblock::BadnessAndPenalty::penaltyValue(int, int)
1.29% dillo ld-musl-armhf.so.1 [.] strcspn
1.22% dillo ld-musl-armhf.so.1 [.] __libc_free
1.20% dillo dillo [.] dw::Textblock::BadnessAndPenalty::badnessValue(int)
1.17% dillo dillo [.] lout::identity::IdentifiableObject::instanceOf(int)
1.16% dillo dillo [.] dw::Textblock::addText(char const*, unsigned int, dw::core::style::Style*)
1.16% dillo dillo [.] dw::Textblock::wrapWordInFlow(int, bool)
1.09% dillo dillo [.] a_Html_get_attr(DilloHtml*, char const*, int, char const*)
1.03% dillo dillo [.] dw::Textblock::getWidgetRegardingBorderForLine(int, int)
1.00% dillo dillo [.] CssSelector::match(Doctree*, DoctreeNode const*, int, CssSelector::Combinator, MatchCache*)
0.92% dillo ld-musl-armhf.so.1 [.] cached_aligned32
0.89% dillo dillo [.] dw::Textblock::getLineStretchability(int)
0.86% dillo ld-musl-armhf.so.1 [.] strlen
0.85% dillo dillo [.] dw::core::Widget::getStyle()
0.85% dillo dillo [.] dw::Textblock::calcBorders(int, int)
0.83% dillo dillo [.] dw::Textblock::handleWordExtremes(int)
0.80% dillo ld-musl-armhf.so.1 [.] alloc_slot
0.79% dillo dillo [.] dw::Textblock::BadnessAndPenalty::compareTo(int, dw::Textblock::BadnessAndPenalty*)
0.78% dillo dillo [.] lout::misc::SimpleVector<dw::Textblock::Line>::size() const
0.77% dillo dillo [.] lout::container::untyped::Vector::get(int) const
0.77% dillo dillo [.] lout::object::ConstString::hashValue(char const*)
0.66% dillo libgcc_s.so.1 [.] __aeabi_idiv
0.66% dillo dillo [.] CssSimpleSelector::match(DoctreeNode const*)
0.64% dillo dillo [.] lout::container::untyped::HashSet::findNode(lout::object::Object*) const
0.63% dillo dillo [.] lout::misc::SimpleVector<dw::Textblock::Line>::getRef(int) const
There is a bottleneck in getRef of the NotSoSimpleVector. We could probably optimize the hot path as it is doing several checks that are not really needed. Including two asserts.
│ 0005f7fc <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const>: ▒
│ /** ▒
│ * \brief Return the reference of one element. ▒
│ * ▒
│ * \sa misc::SimpleVector::get ▒
│ */ ▒
│ inline T* getRef (int i) const ▒
2.72 │ push {r7, lr} ▒
1.83 │ sub sp, #8 ▒
2.42 │ add r7, sp, #0 ▒
5.87 │ str r0, [r7, #4] ▒
1.99 │ str r1, [r7, #0] ▒
│ { ▒
│ if (this->startExtra == -1) { ▒
2.01 │ ldr r3, [r7, #4] ▒
5.73 │ ldr r3, [r3, #28] ▒
3.61 │ cmp.w r3, #4294967295 ▒
│ bne.n 5f84e <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x52> ▒
│ assert (i >= 0 && i < this->numMain); ▒
5.79 │ ldr r3, [r7, #0] ▒
3.48 │ cmp r3, #0 ▒
0.34 │ blt.n 5f820 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x24> ▒
5.49 │ ldr r3, [r7, #4] ▒
5.47 │ ldr r3, [r3, #12] ▒
1.67 │ ldr r2, [r7, #0] ▒
3.40 │ cmp r2, r3 ▒
│ blt.n 5f834 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x38> ▒
│ ldr r3, [pc, #228] @ (5f908 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x10c>) ▒
│ add r3, pc ▒
│ movw r2, #479 @ 0x1df ▒
│ ldr r1, [pc, #224] @ (5f90c <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x110>) ▒
│ add r1, pc ▒
│ ldr r0, [pc, #224] @ (5f910 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x114>) ▒
│ add r0, pc ▒
│ → blx strrchr@plt ▒
│ return this->arrayMain + i; ▒
5.98 │ ldr r3, [r7, #4] ▒
5.65 │ ldr r1, [r3, #0] ▒
1.80 │ ldr r2, [r7, #0] ▒
3.96 │ mov r3, r2 ▒
4.10 │ lsls r3, r3, #1 ▒
1.86 │ add r3, r2 ▒
3.54 │ lsls r2, r3, #3 ▒
1.68 │ subs r2, r2, r3 ▒
3.85 │ lsls r3, r2, #2 ▒
2.06 │ mov r2, r3 ▒
1.84 │ mov r3, r2 ▒
2.02 │ add r3, r1 ▒
│ b.n 5f8fe <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x102> ▒
│ } else { ▒
│ if (i < this->startExtra) { ▒
│ ldr r3, [r7, #4] ▒
│ ldr r3, [r3, #28] ▒
│ ldr r2, [r7, #0] ▒
│ cmp r2, r3 ▒
│ bge.n 5f88c <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x90> ▒
│ assert (i >= 0); ▒
│ ldr r3, [r7, #0] ▒
│ cmp r3, #0 ▒
│ bge.n 5f872 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x76> ▒
│ ldr r3, [pc, #180] @ (5f914 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x118>) ▒
│ add r3, pc ▒
│ movw r2, #483 @ 0x1e3 ▒
│ ldr r1, [pc, #176] @ (5f918 <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x11c>) ▒
│ add r1, pc ▒
│ ldr r0, [pc, #176] @ (5f91c <lout::misc::NotSoSimpleVector<dw::Textblock::Word>::getRef(int) const+0x120>) ▒
│ add r0, pc ▒
│ → blx strrchr@plt
...