@@ -286,6 +286,41 @@ static unsigned long dbuf_metadata_cache_target_bytes(void);
286286static uint_t dbuf_cache_hiwater_pct = 10 ;
287287static uint_t dbuf_cache_lowater_pct = 10 ;
288288
289+ void
290+ assert_db_data_addr_locked (const dmu_buf_impl_t * db )
291+ {
292+ if (db -> db_level > 0 )
293+ return ;
294+ else if (db -> db .db_object == DMU_META_DNODE_OBJECT )
295+ return ;
296+ ASSERT (MUTEX_HELD (& db -> db_mtx ));
297+ }
298+
299+ void
300+ #ifdef __linux__
301+ assert_db_data_contents_locked (dmu_buf_impl_t * db , boolean_t writer )
302+ #else
303+ assert_db_data_contents_locked (const dmu_buf_impl_t * db , boolean_t writer )
304+ #endif
305+ {
306+ /*
307+ * db_rwlock protects indirect blocks and the data block of the meta
308+ * dnode.
309+ */
310+ if (db -> db_blkid == DMU_BONUS_BLKID || db -> db_blkid == DMU_SPILL_BLKID )
311+ return ;
312+ if (db -> db_dirtycnt == 0 )
313+ return ;
314+ else if (db -> db_level == 0 )
315+ return ;
316+ else if (db -> db .db_object != DMU_META_DNODE_OBJECT )
317+ return ;
318+ if (writer )
319+ ASSERT (RW_WRITE_HELD (& db -> db_rwlock ));
320+ else
321+ ASSERT (RW_LOCK_HELD (& db -> db_rwlock ));
322+ }
323+
289324static int
290325dbuf_cons (void * vdb , void * unused , int kmflag )
291326{
@@ -1215,13 +1250,16 @@ dbuf_verify(dmu_buf_impl_t *db)
12151250 */
12161251 if (db -> db_dirtycnt == 0 ) {
12171252 if (db -> db_level == 0 ) {
1218- uint64_t * buf = db -> db . db_data ;
1253+ uint64_t * buf ;
12191254 int i ;
12201255
1256+ assert_db_data_contents_locked (db , FALSE);
1257+ buf = db -> db .db_data ;
12211258 for (i = 0 ; i < db -> db .db_size >> 3 ; i ++ ) {
12221259 ASSERT0 (buf [i ]);
12231260 }
12241261 } else {
1262+ assert_db_data_contents_locked (db , FALSE);
12251263 blkptr_t * bps = db -> db .db_data ;
12261264 ASSERT3U (1 << DB_DNODE (db )-> dn_indblkshift , = = ,
12271265 db -> db .db_size );
@@ -1703,6 +1741,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
17031741 int bonuslen = DN_SLOTS_TO_BONUSLEN (dn -> dn_num_slots );
17041742 dr -> dt .dl .dr_data = kmem_alloc (bonuslen , KM_SLEEP );
17051743 arc_space_consume (bonuslen , ARC_SPACE_BONUS );
1744+ assert_db_data_contents_locked (db , FALSE);
17061745 memcpy (dr -> dt .dl .dr_data , db -> db .db_data , bonuslen );
17071746 } else if (zfs_refcount_count (& db -> db_holds ) > db -> db_dirtycnt ) {
17081747 dnode_t * dn = DB_DNODE (db );
@@ -1733,6 +1772,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
17331772 } else {
17341773 dr -> dt .dl .dr_data = arc_alloc_buf (spa , db , type , size );
17351774 }
1775+ assert_db_data_contents_locked (db , FALSE);
17361776 memcpy (dr -> dt .dl .dr_data -> b_data , db -> db .db_data , size );
17371777 } else {
17381778 db -> db_buf = NULL ;
@@ -3023,6 +3063,7 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
30233063 ASSERT (db -> db_blkid != DMU_BONUS_BLKID );
30243064 /* we were freed while filling */
30253065 /* XXX dbuf_undirty? */
3066+ assert_db_data_contents_locked (db , TRUE);
30263067 memset (db -> db .db_data , 0 , db -> db .db_size );
30273068 db -> db_freed_in_flight = FALSE;
30283069 db -> db_state = DB_CACHED ;
@@ -3155,6 +3196,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx,
31553196 ASSERT (!arc_is_encrypted (buf ));
31563197 mutex_exit (& db -> db_mtx );
31573198 (void ) dbuf_dirty (db , tx );
3199+ assert_db_data_contents_locked (db , TRUE);
31583200 memcpy (db -> db .db_data , buf -> b_data , db -> db .db_size );
31593201 arc_buf_destroy (buf , db );
31603202 return ;
@@ -3398,6 +3440,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
33983440 * parentp = NULL ;
33993441 return (err );
34003442 }
3443+ assert_db_data_addr_locked (* parentp );
34013444 * bpp = ((blkptr_t * )(* parentp )-> db .db_data ) +
34023445 (blkid & ((1ULL << epbs ) - 1 ));
34033446 return (0 );
@@ -4584,10 +4627,12 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
45844627 return (& dn -> dn_phys -> dn_blkptr [dr -> dt .dll .dr_blkid ]);
45854628 } else {
45864629 dmu_buf_impl_t * parent_db = dr -> dr_parent -> dr_dbuf ;
4630+ assert_db_data_addr_locked (parent_db );
45874631 int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
45884632 VERIFY3U (parent_db -> db_level , = = , 1 );
45894633 VERIFY3P (DB_DNODE (parent_db ), = = , dn );
45904634 VERIFY3U (dr -> dt .dll .dr_blkid >> epbs , = = , parent_db -> db_blkid );
4635+ assert_db_data_contents_locked (parent_db , FALSE);
45914636 blkptr_t * bp = parent_db -> db .db_data ;
45924637 return (& bp [dr -> dt .dll .dr_blkid & ((1 << epbs ) - 1 )]);
45934638 }
@@ -4598,12 +4643,22 @@ dbuf_lightweight_ready(zio_t *zio)
45984643{
45994644 dbuf_dirty_record_t * dr = zio -> io_private ;
46004645 blkptr_t * bp = zio -> io_bp ;
4646+ dmu_buf_impl_t * parent_db = NULL ;
46014647
46024648 if (zio -> io_error != 0 )
46034649 return ;
46044650
46054651 dnode_t * dn = dr -> dr_dnode ;
46064652
4653+ EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4654+ if (dr -> dr_parent == NULL ) {
4655+ parent_db = dn -> dn_dbuf ;
4656+ } else {
4657+ parent_db = dr -> dr_parent -> dr_dbuf ;
4658+ }
4659+
4660+ assert_db_data_addr_locked (parent_db );
4661+ rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46074662 blkptr_t * bp_orig = dbuf_lightweight_bp (dr );
46084663 spa_t * spa = dmu_objset_spa (dn -> dn_objset );
46094664 int64_t delta = bp_get_dsize_sync (spa , bp ) -
@@ -4623,14 +4678,6 @@ dbuf_lightweight_ready(zio_t *zio)
46234678 BP_SET_FILL (bp , fill );
46244679 }
46254680
4626- dmu_buf_impl_t * parent_db ;
4627- EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4628- if (dr -> dr_parent == NULL ) {
4629- parent_db = dn -> dn_dbuf ;
4630- } else {
4631- parent_db = dr -> dr_parent -> dr_dbuf ;
4632- }
4633- rw_enter (& parent_db -> db_rwlock , RW_WRITER );
46344681 * bp_orig = * bp ;
46354682 rw_exit (& parent_db -> db_rwlock );
46364683}
@@ -4664,6 +4711,7 @@ noinline static void
46644711dbuf_sync_lightweight (dbuf_dirty_record_t * dr , dmu_tx_t * tx )
46654712{
46664713 dnode_t * dn = dr -> dr_dnode ;
4714+ dmu_buf_impl_t * parent_db = NULL ;
46674715 zio_t * pio ;
46684716 if (dn -> dn_phys -> dn_nlevels == 1 ) {
46694717 pio = dn -> dn_zio ;
@@ -4682,6 +4730,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46824730 * See comment in dbuf_write(). This is so that zio->io_bp_orig
46834731 * will have the old BP in dbuf_lightweight_done().
46844732 */
4733+ if (dr -> dr_dnode -> dn_phys -> dn_nlevels != 1 ) {
4734+ parent_db = dr -> dr_parent -> dr_dbuf ;
4735+ assert_db_data_addr_locked (parent_db );
4736+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4737+ }
46854738 dr -> dr_bp_copy = * dbuf_lightweight_bp (dr );
46864739
46874740 dr -> dr_zio = zio_write (pio , dmu_objset_spa (dn -> dn_objset ),
@@ -4691,6 +4744,9 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
46914744 dbuf_lightweight_done , dr , ZIO_PRIORITY_ASYNC_WRITE ,
46924745 ZIO_FLAG_MUSTSUCCEED | dr -> dt .dll .dr_flags , & zb );
46934746
4747+ if (parent_db )
4748+ rw_exit (& parent_db -> db_rwlock );
4749+
46944750 zio_nowait (dr -> dr_zio );
46954751}
46964752
@@ -4847,6 +4903,7 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
48474903 } else {
48484904 * datap = arc_alloc_buf (os -> os_spa , db , type , psize );
48494905 }
4906+ assert_db_data_contents_locked (db , FALSE);
48504907 memcpy ((* datap )-> b_data , db -> db .db_data , psize );
48514908 }
48524909 db -> db_data_pending = dr ;
@@ -4953,6 +5010,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49535010
49545011 if (dn -> dn_type == DMU_OT_DNODE ) {
49555012 i = 0 ;
5013+ rw_enter (& db -> db_rwlock , RW_READER );
49565014 while (i < db -> db .db_size ) {
49575015 dnode_phys_t * dnp =
49585016 (void * )(((char * )db -> db .db_data ) + i );
@@ -4978,6 +5036,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49785036 DNODE_MIN_SIZE ;
49795037 }
49805038 }
5039+ rw_exit (& db -> db_rwlock );
49815040 } else {
49825041 if (BP_IS_HOLE (bp )) {
49835042 fill = 0 ;
@@ -4986,6 +5045,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49865045 }
49875046 }
49885047 } else {
5048+ rw_enter (& db -> db_rwlock , RW_READER );
49895049 blkptr_t * ibp = db -> db .db_data ;
49905050 ASSERT3U (db -> db .db_size , = = , 1 <<dn -> dn_phys -> dn_indblkshift );
49915051 for (i = db -> db .db_size >> SPA_BLKPTRSHIFT ; i > 0 ; i -- , ibp ++ ) {
@@ -4995,6 +5055,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
49955055 BLK_CONFIG_SKIP , BLK_VERIFY_HALT );
49965056 fill += BP_GET_FILL (ibp );
49975057 }
5058+ rw_exit (& db -> db_rwlock );
49985059 }
49995060 DB_DNODE_EXIT (db );
50005061
@@ -5029,6 +5090,8 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50295090 DB_DNODE_EXIT (db );
50305091 ASSERT3U (epbs , < , 31 );
50315092
5093+ assert_db_data_addr_locked (db );
5094+ rw_enter (& db -> db_rwlock , RW_READER );
50325095 /* Determine if all our children are holes */
50335096 for (i = 0 , bp = db -> db .db_data ; i < 1ULL << epbs ; i ++ , bp ++ ) {
50345097 if (!BP_IS_HOLE (bp ))
@@ -5045,10 +5108,13 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
50455108 * anybody from reading the blocks we're about to
50465109 * zero out.
50475110 */
5048- rw_enter (& db -> db_rwlock , RW_WRITER );
5111+ if (!rw_tryupgrade (& db -> db_rwlock )) {
5112+ rw_exit (& db -> db_rwlock );
5113+ rw_enter (& db -> db_rwlock , RW_WRITER );
5114+ }
50495115 memset (db -> db .db_data , 0 , db -> db .db_size );
5050- rw_exit (& db -> db_rwlock );
50515116 }
5117+ rw_exit (& db -> db_rwlock );
50525118}
50535119
50545120static void
@@ -5243,11 +5309,11 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
52435309 * avoid lock contention, only grab it when we are actually
52445310 * changing the BP.
52455311 */
5246- if (rw != NULL )
5312+ if (rw != NULL && !rw_tryupgrade (rw )) {
5313+ rw_exit (rw );
52475314 rw_enter (rw , RW_WRITER );
5315+ }
52485316 * bp = bp_copy ;
5249- if (rw != NULL )
5250- rw_exit (rw );
52515317 }
52525318}
52535319
@@ -5263,6 +5329,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52635329 if (!spa_feature_is_active (spa , SPA_FEATURE_DEVICE_REMOVAL ))
52645330 return ;
52655331
5332+ assert_db_data_addr_locked (db );
5333+ rw_enter (& db -> db_rwlock , RW_READER );
52665334 if (db -> db_level > 0 ) {
52675335 blkptr_t * bp = db -> db .db_data ;
52685336 for (int i = 0 ; i < db -> db .db_size >> SPA_BLKPTRSHIFT ; i ++ ) {
@@ -5281,6 +5349,7 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
52815349 }
52825350 }
52835351 }
5352+ rw_exit (& db -> db_rwlock );
52845353}
52855354
52865355
0 commit comments