@@ -411,10 +411,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
411411{
412412 const sector_t this_sector = r1_bio -> sector ;
413413 const int sectors = r1_bio -> sectors ;
414- int new_disk = -1 ;
415414 int start_disk ;
415+ int best_disk ;
416416 int i ;
417- sector_t new_distance , current_distance ;
417+ sector_t best_dist ;
418418 mdk_rdev_t * rdev ;
419419 int choose_first ;
420420
@@ -425,6 +425,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
425425 * We take the first readable disk when above the resync window.
426426 */
427427 retry :
428+ best_disk = -1 ;
429+ best_dist = MaxSector ;
428430 if (conf -> mddev -> recovery_cp < MaxSector &&
429431 (this_sector + sectors >= conf -> next_resync )) {
430432 choose_first = 1 ;
@@ -434,81 +436,64 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
434436 start_disk = conf -> last_used ;
435437 }
436438
437- /* make sure the disk is operational */
438439 for (i = 0 ; i < conf -> raid_disks ; i ++ ) {
440+ sector_t dist ;
439441 int disk = start_disk + i ;
440442 if (disk >= conf -> raid_disks )
441443 disk -= conf -> raid_disks ;
442444
443445 rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
444446 if (r1_bio -> bios [disk ] == IO_BLOCKED
445447 || rdev == NULL
446- || ! test_bit (In_sync , & rdev -> flags ))
448+ || test_bit (Faulty , & rdev -> flags ))
447449 continue ;
448-
449- new_disk = disk ;
450- if (!test_bit (WriteMostly , & rdev -> flags ))
451- break ;
452- }
453-
454- if (new_disk < 0 || choose_first )
455- goto rb_out ;
456-
457- /*
458- * Don't change to another disk for sequential reads:
459- */
460- if (conf -> next_seq_sect == this_sector )
461- goto rb_out ;
462- if (this_sector == conf -> mirrors [new_disk ].head_position )
463- goto rb_out ;
464-
465- current_distance = abs (this_sector
466- - conf -> mirrors [new_disk ].head_position );
467-
468- /* look for a better disk - i.e. head is closer */
469- start_disk = new_disk ;
470- for (i = 1 ; i < conf -> raid_disks ; i ++ ) {
471- int disk = start_disk + 1 ;
472- if (disk >= conf -> raid_disks )
473- disk -= conf -> raid_disks ;
474-
475- rdev = rcu_dereference (conf -> mirrors [disk ].rdev );
476- if (r1_bio -> bios [disk ] == IO_BLOCKED
477- || rdev == NULL
478- || !test_bit (In_sync , & rdev -> flags )
479- || test_bit (WriteMostly , & rdev -> flags ))
450+ if (!test_bit (In_sync , & rdev -> flags ) &&
451+ rdev -> recovery_offset < this_sector + sectors )
480452 continue ;
481-
482- if (!atomic_read (& rdev -> nr_pending )) {
483- new_disk = disk ;
453+ if (test_bit (WriteMostly , & rdev -> flags )) {
454+ /* Don't balance among write-mostly, just
455+ * use the first as a last resort */
456+ if (best_disk < 0 )
457+ best_disk = disk ;
458+ continue ;
459+ }
460+ /* This is a reasonable device to use. It might
461+ * even be best.
462+ */
463+ dist = abs (this_sector - conf -> mirrors [disk ].head_position );
464+ if (choose_first
465+ /* Don't change to another disk for sequential reads */
466+ || conf -> next_seq_sect == this_sector
467+ || dist == 0
468+ /* If device is idle, use it */
469+ || atomic_read (& rdev -> nr_pending ) == 0 ) {
470+ best_disk = disk ;
484471 break ;
485472 }
486- new_distance = abs (this_sector - conf -> mirrors [disk ].head_position );
487- if (new_distance < current_distance ) {
488- current_distance = new_distance ;
489- new_disk = disk ;
473+ if (dist < best_dist ) {
474+ best_dist = dist ;
475+ best_disk = disk ;
490476 }
491477 }
492478
493- rb_out :
494- if (new_disk >= 0 ) {
495- rdev = rcu_dereference (conf -> mirrors [new_disk ].rdev );
479+ if (best_disk >= 0 ) {
480+ rdev = rcu_dereference (conf -> mirrors [best_disk ].rdev );
496481 if (!rdev )
497482 goto retry ;
498483 atomic_inc (& rdev -> nr_pending );
499- if (! test_bit (In_sync , & rdev -> flags )) {
484+ if (test_bit (Faulty , & rdev -> flags )) {
500485 /* cannot risk returning a device that failed
501486 * before we inc'ed nr_pending
502487 */
503488 rdev_dec_pending (rdev , conf -> mddev );
504489 goto retry ;
505490 }
506491 conf -> next_seq_sect = this_sector + sectors ;
507- conf -> last_used = new_disk ;
492+ conf -> last_used = best_disk ;
508493 }
509494 rcu_read_unlock ();
510495
511- return new_disk ;
496+ return best_disk ;
512497}
513498
514499static int raid1_congested (void * data , int bits )
0 commit comments