@@ -68,7 +68,144 @@ static void contpte_convert(struct mm_struct *mm, unsigned long addr,
6868 pte = pte_mkyoung (pte );
6969 }
7070
71- __flush_tlb_range (& vma , start_addr , addr , PAGE_SIZE , true, 3 );
71+ /*
72+ * On eliding the __tlb_flush_range() under BBML2+noabort:
73+ *
74+ * NOTE: Instead of using N=16 as the contiguous block length, we use
75+ * N=4 for clarity.
76+ *
77+ * NOTE: 'n' and 'c' are used to denote the "contiguous bit" being
78+ * unset and set, respectively.
79+ *
80+ * We worry about two cases where contiguous bit is used:
81+ * - When folding N smaller non-contiguous ptes as 1 contiguous block.
82+ * - When unfolding a contiguous block into N smaller non-contiguous ptes.
83+ *
84+ * Currently, the BBML0 folding case looks as follows:
85+ *
86+ * 0) Initial page-table layout:
87+ *
88+ * +----+----+----+----+
89+ * |RO,n|RO,n|RO,n|RW,n| <--- last page being set as RO
90+ * +----+----+----+----+
91+ *
92+ * 1) Aggregate AF + dirty flags using __ptep_get_and_clear():
93+ *
94+ * +----+----+----+----+
95+ * | 0 | 0 | 0 | 0 |
96+ * +----+----+----+----+
97+ *
98+ * 2) __flush_tlb_range():
99+ *
100+ * |____ tlbi + dsb ____|
101+ *
102+ * 3) __set_ptes() to repaint contiguous block:
103+ *
104+ * +----+----+----+----+
105+ * |RO,c|RO,c|RO,c|RO,c|
106+ * +----+----+----+----+
107+ *
108+ * 4) The kernel will eventually __flush_tlb() for changed page:
109+ *
110+ * |____| <--- tlbi + dsb
111+ *
112+ * As expected, the intermediate tlbi+dsb ensures that other PEs
113+ * only ever see an invalid (0) entry, or the new contiguous TLB entry.
114+ * The final tlbi+dsb will always throw away the newly installed
115+ * contiguous TLB entry, which is a micro-optimisation opportunity,
116+ * but does not affect correctness.
117+ *
118+ * In the BBML2 case, the change is avoiding the intermediate tlbi+dsb.
119+ * This means a few things, but notably other PEs will still "see" any
120+ * stale cached TLB entries. This could lead to a "contiguous bit
121+ * misprogramming" issue until the final tlbi+dsb of the changed page,
122+ * which would clear out both the stale (RW,n) entry and the new (RO,c)
123+ * contiguous entry installed in its place.
124+ *
125+ * What this is saying, is the following:
126+ *
127+ * +----+----+----+----+
128+ * |RO,n|RO,n|RO,n|RW,n| <--- old page tables, all non-contiguous
129+ * +----+----+----+----+
130+ *
131+ * +----+----+----+----+
132+ * |RO,c|RO,c|RO,c|RO,c| <--- new page tables, all contiguous
133+ * +----+----+----+----+
134+ * /\
135+ * ||
136+ *
137+ * If both the old single (RW,n) and new contiguous (RO,c) TLB entries
138+ * are present, and a write is made to this address, do we fault or
139+ * is the write permitted (via amalgamation)?
140+ *
141+ * The relevant Arm ARM DDI 0487L.a requirements are RNGLXZ and RJQQTC,
142+ * and together state that when BBML1 or BBML2 are implemented, either
143+ * a TLB conflict abort is raised (which we expressly forbid), or will
144+ * "produce an OA, access permissions, and memory attributes that are
145+ * consistent with any of the programmed translation table values".
146+ *
147+ * That is to say, will either raise a TLB conflict, or produce one of
148+ * the cached TLB entries, but never amalgamate.
149+ *
150+ * Thus, as the page tables are only considered "consistent" after
151+ * the final tlbi+dsb (which evicts both the single stale (RW,n) TLB
152+ * entry as well as the new contiguous (RO,c) TLB entry), omitting the
153+ * initial tlbi+dsb is correct.
154+ *
155+ * It is also important to note that at the end of the BBML2 folding
156+ * case, we are still left with potentially all N TLB entries still
157+ * cached (the N-1 non-contiguous ptes, and the single contiguous
158+ * block). However, over time, natural TLB pressure will cause the
159+ * non-contiguous pte TLB entries to be flushed, leaving only the
160+ * contiguous block TLB entry. This means that omitting the tlbi+dsb is
161+ * not only correct, but also keeps our eventual performance benefits.
162+ *
163+ * For the unfolding case, BBML0 looks as follows:
164+ *
165+ * 0) Initial page-table layout:
166+ *
167+ * +----+----+----+----+
168+ * |RW,c|RW,c|RW,c|RW,c| <--- last page being set as RO
169+ * +----+----+----+----+
170+ *
171+ * 1) Aggregate AF + dirty flags using __ptep_get_and_clear():
172+ *
173+ * +----+----+----+----+
174+ * | 0 | 0 | 0 | 0 |
175+ * +----+----+----+----+
176+ *
177+ * 2) __flush_tlb_range():
178+ *
179+ * |____ tlbi + dsb ____|
180+ *
181+ * 3) __set_ptes() to repaint as non-contiguous:
182+ *
183+ * +----+----+----+----+
184+ * |RW,n|RW,n|RW,n|RW,n|
185+ * +----+----+----+----+
186+ *
187+ * 4) Update changed page permissions:
188+ *
189+ * +----+----+----+----+
190+ * |RW,n|RW,n|RW,n|RO,n| <--- last page permissions set
191+ * +----+----+----+----+
192+ *
193+ * 5) The kernel will eventually __flush_tlb() for changed page:
194+ *
195+ * |____| <--- tlbi + dsb
196+ *
197+ * For BBML2, we again remove the intermediate tlbi+dsb. Here, there
198+ * are no issues, as the final tlbi+dsb covering the changed page is
199+ * guaranteed to remove the original large contiguous (RW,c) TLB entry,
200+ * as well as the intermediate (RW,n) TLB entry; the next access will
201+ * install the new (RO,n) TLB entry and the page tables are only
202+ * considered "consistent" after the final tlbi+dsb, so software must
203+ * be prepared for this inconsistency prior to finishing the mm dance
204+ * regardless.
205+ */
206+
207+ if (!system_supports_bbml2_noabort ())
208+ __flush_tlb_range (& vma , start_addr , addr , PAGE_SIZE , true, 3 );
72209
73210 __set_ptes (mm , start_addr , start_ptep , pte , CONT_PTES );
74211}
0 commit comments