@@ -90,14 +90,15 @@ scoreMove (ScoreMoveInput {..}) = do
90
90
let AlphaBeta params rules eval = smiAi
91
91
score <- Monitoring. timed " ai.score.move" $ do
92
92
let board' = applyMoveActions (pmResult smiMove) smiBoard
93
- score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiAlpha smiBeta
93
+ score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiGlobalInterval smiAlpha smiBeta
94
94
`catchError` (\ (e :: Error ) -> do
95
95
$ info " doScore: move {}, depth {}: {}" (show smiMove, dpTarget smiDepth, show e)
96
96
throwError e
97
97
)
98
98
$ info " Check: {} ([{} - {}], depth {}) => {}" (show smiMove, show smiAlpha, show smiBeta, dpTarget smiDepth, show score)
99
99
return score
100
100
101
+ restrictInterval smiGlobalInterval smiSide score
101
102
return (smiMove, score)
102
103
103
104
scoreMoveGroup :: (GameRules rules , Evaluator eval ) => [ScoreMoveInput rules eval ] -> Checkers [(PossibleMove , Score )]
@@ -373,7 +374,8 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
373
374
, dpReductedMode = False
374
375
}
375
376
$ info " Preselecting; number of possible moves = {}, depth = {}" (length moves, dpTarget simple)
376
- options <- scoreMoves' False moves simple (loose, win)
377
+ globalInterval <- liftIO $ atomically $ newTVar (loose, win)
378
+ options <- scoreMoves' False moves simple globalInterval (loose, win)
377
379
let key = if maximize
378
380
then negate . snd
379
381
else snd
@@ -385,14 +387,16 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
385
387
-- return result
386
388
387
389
depthDriver :: [PossibleMove ] -> Checkers DepthIterationOutput
388
- depthDriver moves =
390
+ depthDriver moves = do
391
+ globalInterval <- liftIO $ atomically $ newTVar (loose, win)
389
392
let input = DepthIterationInput {
390
393
diiParams = params,
391
394
diiMoves = moves,
392
395
diiPrevResult = Nothing ,
396
+ diiGlobalInterval = globalInterval,
393
397
diiSortKeys = Nothing
394
398
}
395
- in case abBaseTime params of
399
+ case abBaseTime params of
396
400
Nothing -> do
397
401
(result, _) <- go input
398
402
return result
@@ -445,7 +449,7 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
445
449
Just keys -> return keys
446
450
Nothing -> preselect preselectDepth diiMoves
447
451
let sortedMoves = map snd $ sortOn fst $ zip sortKeys diiMoves
448
- result <- widthController True True diiPrevResult sortedMoves dp' =<< initInterval
452
+ result <- widthController True True diiPrevResult sortedMoves dp' diiGlobalInterval =<< initInterval
449
453
-- In some corner cases, there might be 1 or 2 possible moves,
450
454
-- so the timeout would allow us to calculate with very big depth;
451
455
-- too big depth does not decide anything in such situations.
@@ -493,22 +497,22 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
493
497
| s > 100 = 5
494
498
| otherwise = 2
495
499
496
- nextInterval :: (Score , Score ) -> (Score , Score )
497
- nextInterval (alpha, beta) =
500
+ nextInterval :: Score -> (Score , Score ) -> (Score , Score )
501
+ nextInterval good (alpha, beta) =
498
502
let width = (beta - alpha)
499
503
width' = selectScale width `scaleScore` width
500
- alpha' = prevScore alpha
501
- beta' = nextScore beta
504
+ alpha' = min good ( prevScore alpha)
505
+ beta' = max good ( nextScore beta)
502
506
in if maximize
503
507
then (beta', max beta' (beta' + width'))
504
508
else (min alpha' (alpha' - width'), alpha')
505
509
506
- prevInterval :: (Score , Score ) -> (Score , Score )
507
- prevInterval (alpha, beta) =
510
+ prevInterval :: Score -> (Score , Score ) -> (Score , Score )
511
+ prevInterval bad (alpha, beta) =
508
512
let width = (beta - alpha)
509
513
width' = selectScale width `scaleScore` width
510
- alpha' = prevScore alpha
511
- beta' = nextScore beta
514
+ alpha' = min bad ( prevScore alpha)
515
+ beta' = max bad ( nextScore beta)
512
516
in if minimize
513
517
then (beta', max beta' (beta' + width'))
514
518
else (min alpha' (alpha' - width'), alpha')
@@ -518,24 +522,26 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
518
522
-> Maybe DepthIterationOutput -- ^ Results of previous depth iteration
519
523
-> [PossibleMove ]
520
524
-> DepthParams
525
+ -> TVar (Score , Score ) -- ^ Global (alpha, beta)
521
526
-> (Score , Score ) -- ^ (Alpha, Beta)
522
527
-> Checkers DepthIterationOutput
523
- widthController allowNext allowPrev prevResult moves dp interval@ (alpha,beta) =
528
+ widthController allowNext allowPrev prevResult moves dp globalInterval localInterval = do
529
+ interval@ (alpha, beta) <- getRestrictedInterval globalInterval localInterval
524
530
if alpha == beta
525
531
then do
526
532
$ info " Empty scores interval: [{}]. We have to think that all moves have this score." (Single alpha)
527
533
return [(move, alpha) | move <- moves]
528
534
else do
529
- results <- widthIteration prevResult moves dp interval
530
- let (good, badScore, badMoves) = selectBestEdge interval moves results
535
+ results <- widthIteration prevResult moves dp globalInterval interval
536
+ let (goodScore, good, badScore, badMoves) = selectBestEdge interval moves results
531
537
(bestMoves, bestResults) = unzip good
532
538
if length badMoves == length moves
533
539
then
534
540
if allowPrev
535
541
then do
536
- let interval' = prevInterval interval
542
+ let interval' = prevInterval badScore interval
537
543
$ info " All moves are `too bad'; consider worse scores interval: [{} - {}]" interval'
538
- widthController False True prevResult badMoves dp interval'
544
+ widthController False True prevResult badMoves dp globalInterval interval'
539
545
else do
540
546
$ info " All moves are `too bad' ({}), but we have already checked worse interval; so this is the real score." (Single badScore)
541
547
return [(move, badScore) | move <- moves]
@@ -548,15 +554,20 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
548
554
_ ->
549
555
if allowNext
550
556
then do
551
- let interval'@ (alpha',beta') = nextInterval interval
557
+ let interval'@ (alpha',beta') = nextInterval goodScore interval
552
558
$ info " Some moves ({} of them) are `too good'; consider better scores interval: [{} - {}]" (length bestMoves, alpha', beta')
553
- widthController True False prevResult bestMoves dp interval'
559
+ widthController True False prevResult bestMoves dp globalInterval interval'
554
560
else do
555
561
$ info " Some moves ({} of them) are `too good'; but we have already checked better interval; so this is the real score" (Single $ length bestMoves)
556
562
return bestResults
557
563
558
- scoreMoves :: Bool -> [PossibleMove ] -> DepthParams -> (Score , Score ) -> Checkers [Either Error (PossibleMove , Score )]
559
- scoreMoves byOne moves dp (alpha, beta) = do
564
+ scoreMoves :: Bool
565
+ -> [PossibleMove ]
566
+ -> DepthParams
567
+ -> TVar (Score , Score ) -- ^ Global interval
568
+ -> (Score , Score ) -- ^ Local interval
569
+ -> Checkers [Either Error (PossibleMove , Score )]
570
+ scoreMoves byOne moves dp globalInterval (localAlpha, localBeta) = do
560
571
let var = aichData handle
561
572
let processor = aichProcessor handle
562
573
let inputs = [
@@ -568,8 +579,9 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
568
579
smiDepth = dp,
569
580
smiBoard = board,
570
581
smiMove = move,
571
- smiAlpha = alpha,
572
- smiBeta = beta
582
+ smiGlobalInterval = globalInterval,
583
+ smiAlpha = localAlpha,
584
+ smiBeta = localBeta
573
585
} | move <- moves ]
574
586
575
587
n = length moves
@@ -580,18 +592,29 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
580
592
581
593
results <- process' processor groups
582
594
return $ concatE (map length groups) results
583
-
584
- scoreMoves' :: Bool -> [PossibleMove ] -> DepthParams -> (Score , Score ) -> Checkers DepthIterationOutput
585
- scoreMoves' byOne moves dp (alpha, beta) = do
586
- results <- scoreMoves byOne moves dp (alpha, beta)
595
+
596
+ scoreMoves' :: Bool
597
+ -> [PossibleMove ]
598
+ -> DepthParams
599
+ -> TVar (Score , Score )
600
+ -> (Score , Score )
601
+ -> Checkers DepthIterationOutput
602
+ scoreMoves' byOne moves dp globalInterval localInterval = do
603
+ results <- scoreMoves byOne moves dp globalInterval localInterval
587
604
case sequence results of
588
605
Right result -> return result
589
606
Left err -> throwError err
590
607
591
- widthIteration :: Maybe DepthIterationOutput -> [PossibleMove ] -> DepthParams -> (Score , Score ) -> Checkers DepthIterationOutput
592
- widthIteration prevResult moves dp (alpha, beta) = do
608
+ widthIteration :: Maybe DepthIterationOutput
609
+ -> [PossibleMove ]
610
+ -> DepthParams
611
+ -> TVar (Score , Score )
612
+ -> (Score , Score )
613
+ -> Checkers DepthIterationOutput
614
+ widthIteration prevResult moves dp globalInterval localInterval = do
615
+ (alpha, beta) <- getRestrictedInterval globalInterval localInterval
593
616
$ info " `- Considering scores interval: [{} - {}], depth = {}" (alpha, beta, dpTarget dp)
594
- results <- scoreMoves False moves dp (alpha, beta)
617
+ results <- scoreMoves False moves dp globalInterval (alpha, beta)
595
618
joinResults prevResult results
596
619
597
620
joinResults :: Maybe DepthIterationOutput -> [Either Error (PossibleMove , Score )] -> Checkers DepthIterationOutput
@@ -612,7 +635,14 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
612
635
let (good, bad) = if maximize then (beta, alpha) else (alpha, beta)
613
636
goodResults = [(move, (goodMoves, score)) | (move, (goodMoves, score)) <- zip moves results, score >= good]
614
637
badResults = [move | (move, (_, score)) <- zip moves results, score <= bad]
615
- in (goodResults, bad, badResults)
638
+ scores = map snd results
639
+ badScore = if maximize
640
+ then minimum scores
641
+ else maximum scores
642
+ goodScore = if maximize
643
+ then maximum scores
644
+ else minimum scores
645
+ in (goodScore, goodResults, bad, badResults)
616
646
617
647
select :: DepthIterationOutput -> Checkers AiOutput
618
648
select pairs = do
@@ -631,10 +661,11 @@ doScore :: (GameRules rules, Evaluator eval)
631
661
-> Side
632
662
-> DepthParams
633
663
-> Board
664
+ -> TVar (Score , Score )
634
665
-> Score -- ^ Alpha
635
666
-> Score -- ^ Beta
636
667
-> Checkers Score
637
- doScore rules eval var params gameId side dp board alpha beta = do
668
+ doScore rules eval var params gameId side dp board globalInterval alpha beta = do
638
669
initState <- mkInitState
639
670
out <- evalStateT (cachedScoreAB var params input) initState
640
671
return $ soScore out
@@ -645,14 +676,36 @@ doScore rules eval var params gameId side dp board alpha beta = do
645
676
let timeout = case abBaseTime params of
646
677
Nothing -> Nothing
647
678
Just sec -> Just $ TimeSpec (fromIntegral sec) 0
648
- return $ ScoreState rules eval gameId [loose] M. empty now timeout
679
+ return $ ScoreState rules eval gameId globalInterval [loose] M. empty now timeout
649
680
650
681
clamp :: Ord a => a -> a -> a -> a
651
682
clamp alpha beta score
652
683
| score < alpha = alpha
653
684
| score > beta = beta
654
685
| otherwise = score
655
686
687
+ restrictInterval :: MonadIO m => TVar (Score , Score ) -> Side -> Score -> m ()
688
+ restrictInterval var side score = liftIO $ atomically $ do
689
+ (globalAlpha, globalBeta) <- readTVar var
690
+ when (globalAlpha < score && score < globalBeta) $
691
+ if side == First -- maximize
692
+ then writeTVar var (score, globalBeta)
693
+ else writeTVar var (globalAlpha, score)
694
+
695
+ getRestrictedInterval :: (MonadIO m , HasLogger m , HasLogContext m ) => TVar (Score , Score ) -> (Score , Score ) -> m (Score , Score )
696
+ getRestrictedInterval global (localAlpha, localBeta) = do
697
+ (globalAlpha, globalBeta) <- liftIO $ atomically $ readTVar global
698
+ let alpha1 = max globalAlpha localAlpha
699
+ beta1 = min globalBeta localBeta
700
+ if alpha1 <= beta1
701
+ then do
702
+ $ trace " Restrict: Global [{}, {}] x Local [{}, {}] => [{}, {}]"
703
+ (globalAlpha, globalBeta, localAlpha, localBeta, alpha1, beta1)
704
+ return (alpha1, beta1)
705
+ else do
706
+ let mid = (alpha1 + beta1) `divideScore` 2
707
+ return (mid, mid)
708
+
656
709
-- | Calculate score of the board.
657
710
-- This uses the cache. It is called in the recursive call also.
658
711
cachedScoreAB :: forall rules eval . (GameRules rules , Evaluator eval )
@@ -676,7 +729,7 @@ cachedScoreAB var params input = do
676
729
-- AB-section: alpha <= result <= beta. So here we clamp the value
677
730
-- that we got from cache.
678
731
case itemBound item of
679
- Exact -> return $ Just $ ScoreOutput (clamp alpha beta score) False
732
+ Exact -> return $ Just $ ScoreOutput score False
680
733
Alpha -> if score <= alpha
681
734
then return $ Just $ ScoreOutput alpha False
682
735
else return Nothing
@@ -776,16 +829,12 @@ scoreAB :: forall rules eval. (GameRules rules, Evaluator eval)
776
829
-> ScoreInput
777
830
-> ScoreM rules eval ScoreOutput
778
831
scoreAB var params input
779
- | alpha == beta = do
780
- $ verbose " Alpha == Beta == {}, return it" (Single $ show alpha)
781
- quiescene <- checkQuiescene
782
- return $ ScoreOutput alpha quiescene
783
-
784
832
| isTargetDepth dp = do
785
833
-- target depth is achieved, calculate score of current board directly
786
834
evaluator <- gets ssEvaluator
787
835
let score0 = evalBoard' evaluator board
788
- $ verbose " X Side: {}, A = {}, B = {}, score0 = {}" (show side, show alpha, show beta, show score0)
836
+ (alpha, beta) <- getRestrictedInterval'
837
+ $ trace " X Side: {}, A = {}, B = {}, score0 = {}" (show side, show alpha, show beta, show score0)
789
838
quiescene <- checkQuiescene
790
839
return $ ScoreOutput score0 quiescene
791
840
@@ -799,6 +848,7 @@ scoreAB var params input
799
848
return out
800
849
Nothing -> do
801
850
851
+ (alpha, beta) <- getRestrictedInterval'
802
852
moves <- lift $ getPossibleMoves var side board
803
853
let quiescene = isQuiescene moves
804
854
let worst
@@ -811,7 +861,7 @@ scoreAB var params input
811
861
$ verbose " {}`—No moves left." (Single indent)
812
862
return $ ScoreOutput worst True
813
863
else
814
- if dpStaticMode dp && isQuiescene moves
864
+ if dpStaticMode dp && quiescene
815
865
-- In static mode, we are considering forced moves only.
816
866
-- If we have reached a quiescene, then that's all.
817
867
then do
@@ -822,25 +872,27 @@ scoreAB var params input
822
872
let best
823
873
| dpStaticMode dp = evalBoard' evaluator board
824
874
| otherwise = worst
825
-
826
- push best
827
- $ verbose " {}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
828
- rules <- gets ssRules
829
- dp' <- updateDepth params moves dp
830
- let prevMove = siPrevMove input
831
- moves' <- sortMoves params var side dp board prevMove moves
832
- let depths = correspondingDepths (length moves') score0 quiescene dp'
833
- -- let depths = repeat dp'
834
- out <- iterateMoves $ zip3 [1 .. ] moves' depths
835
- pop
836
- return out
875
+ if alpha == beta
876
+ then return $ ScoreOutput best quiescene
877
+ else do
878
+ push best
879
+ $ verbose " {}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
880
+ rules <- gets ssRules
881
+ dp' <- updateDepth params moves dp
882
+ let prevMove = siPrevMove input
883
+ moves' <- sortMoves params var side dp board prevMove moves
884
+ let depths = correspondingDepths (length moves') score0 quiescene dp'
885
+ -- let depths = repeat dp'
886
+ out <- iterateMoves $ zip3 [1 .. ] moves' depths
887
+ pop
888
+ return out
837
889
838
890
where
839
891
840
892
side = siSide input
841
893
dp = siDepth input
842
- alpha = siAlpha input
843
- beta = siBeta input
894
+ localAlpha = siAlpha input
895
+ localBeta = siBeta input
844
896
board = siBoard input
845
897
846
898
canReduceDepth :: Score -> Bool -> Bool
@@ -849,8 +901,8 @@ scoreAB var params input
849
901
not (dpReductedMode dp) &&
850
902
dpCurrent dp >= 4 &&
851
903
quiescene &&
852
- score0 > alpha &&
853
- score0 < beta &&
904
+ score0 > localAlpha &&
905
+ score0 < localBeta &&
854
906
score0 > - 10 &&
855
907
score0 < 10
856
908
@@ -868,6 +920,7 @@ scoreAB var params input
868
920
checkFutility = do
869
921
evaluator <- gets ssEvaluator
870
922
quiescene <- checkQuiescene
923
+ (alpha, beta) <- getRestrictedInterval'
871
924
let score0 = evalBoard' evaluator board
872
925
best = if maximize then alpha else beta
873
926
isBad = if maximize
@@ -883,6 +936,11 @@ scoreAB var params input
883
936
then return $ Just $ ScoreOutput score0 quiescene
884
937
else return Nothing
885
938
939
+ getRestrictedInterval' = do
940
+ globalInterval <- gets ssGlobalInterval
941
+ result@ (alpha, beta) <- getRestrictedInterval globalInterval (localAlpha, localBeta)
942
+ return result
943
+
886
944
evalBoard' :: eval -> Board -> Score
887
945
evalBoard' evaluator board = result
888
946
where
@@ -964,6 +1022,7 @@ scoreAB var params input
964
1022
go (input : inputs) = do
965
1023
out <- cachedScoreAB var params input
966
1024
let score = soScore out
1025
+ (alpha, beta) <- getRestrictedInterval'
967
1026
if maximize && score >= beta || minimize && score <= alpha
968
1027
then go inputs
969
1028
else return out
@@ -984,6 +1043,7 @@ scoreAB var params input
984
1043
evaluator <- gets ssEvaluator
985
1044
rules <- gets ssRules
986
1045
best <- getBest
1046
+ let (alpha, beta) = (localAlpha, localBeta)
987
1047
let input' = input {
988
1048
siSide = opposite side
989
1049
, siAlpha = if maximize
0 commit comments