Global alpha-beta take 1.

portnov · portnov · commit 773aea7b6ce6 · 2019-02-16T12:48:59.000+05:00
refs #20.
diff --git a/src/AI/AlphaBeta.hs b/src/AI/AlphaBeta.hs
@@ -76,17 +76,18 @@ scoreMove (ScoreMoveInput {..}) = do
      let AlphaBeta params rules eval = smiAi
      score <- Monitoring.timed "ai.score.move" $ do
                 let board' = applyMoveActions (pmResult smiMove) smiBoard
-                score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiAlpha smiBeta
+                score <- doScore rules eval smiCache params smiGameId (opposite smiSide) smiDepth board' smiGlobalInterval smiAlpha smiBeta
                           `catchError` (\(e :: Error) -> do
                                         $info "doScore: move {}, depth {}: {}" (show smiMove, dpTarget smiDepth, show e)
                                         throwError e
                                   )
                 $info "Check: {} (depth {}) => {}" (show smiMove, dpTarget smiDepth, show score)
                 return score
      
+     -- restrictInterval smiGlobalInterval smiSide score
      return (smiMove, score)
 
-type DepthIterationInput = (AlphaBetaParams, [PossibleMove], Maybe DepthIterationOutput)
+type DepthIterationInput = (AlphaBetaParams, TVar (Score, Score), [PossibleMove], Maybe DepthIterationOutput)
 type DepthIterationOutput = [(PossibleMove, Score)]
 type AiOutput = ([PossibleMove], Score)
 
@@ -232,17 +233,18 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
 --           return result
 
     depthDriver :: [PossibleMove] -> Checkers DepthIterationOutput
-    depthDriver moves =
+    depthDriver moves = do
+      globalInterval <- liftIO $ atomically $ newTVar (loose, win)
       case abBaseTime params of
         Nothing -> do
-          (result, _) <- go (params, moves, Nothing)
+          (result, _) <- go (params, globalInterval, moves, Nothing)
           return result
-        Just time -> repeatTimed' "runAI" time goTimed (params, moves, Nothing)
+        Just time -> repeatTimed' "runAI" time goTimed (params, globalInterval, moves, Nothing)
   
     goTimed :: DepthIterationInput
             -> Checkers (DepthIterationOutput, Maybe DepthIterationInput)
-    goTimed (params, moves, prevResult) = do
-      ret <- tryC $ go (params, moves, prevResult)
+    goTimed (params, globalInterval, moves, prevResult) = do
+      ret <- tryC $ go (params, globalInterval, moves, prevResult)
       case ret of
         Right result -> return result
         Left TimeExhaused ->
@@ -253,7 +255,7 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
 
     go :: DepthIterationInput
             -> Checkers (DepthIterationOutput, Maybe DepthIterationInput)
-    go (params, moves, prevResult) = do
+    go (params, globalInterval, moves, prevResult) = do
       let depth = abDepth params
       if length moves <= 1 -- Just one move possible
         then do
@@ -276,14 +278,14 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
                                     dpTarget = min (dpMax dp) (dpTarget dp + 1)
                                   }
                 | otherwise = dp
-          result <- widthController True True prevResult moves dp' =<< initInterval
+          result <- widthController True True prevResult moves dp' globalInterval =<< initInterval
           -- In some corner cases, there might be 1 or 2 possible moves,
           -- so the timeout would allow us to calculate with very big depth;
           -- too big depth does not decide anything in such situations.
           if depth < 50
             then do
               let params' = params {abDepth = depth + 1, abStartDepth = Nothing}
-              return (result, Just (params', moves, Just result))
+              return (result, Just (params', globalInterval, moves, Just result))
             else return (result, Nothing)
 
     score0 = evalBoard eval First board
@@ -317,22 +319,22 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
       | s > 100 = 5
       | otherwise = 2
 
-    nextInterval :: (Score, Score) -> (Score, Score)
-    nextInterval (alpha, beta) =
+    nextInterval :: Score -> (Score, Score) -> (Score, Score)
+    nextInterval good (alpha, beta) =
       let width = (beta - alpha)
           width' = selectScale width `scaleScore` width
-          alpha' = prevScore alpha
-          beta'  = nextScore beta
+          alpha' = min good (prevScore alpha)
+          beta'  = max good (nextScore beta)
       in  if maximize
             then (beta', max beta' (beta' + width'))
             else (min alpha' (alpha' - width'), alpha')
 
-    prevInterval :: (Score, Score) -> (Score, Score)
-    prevInterval (alpha, beta) =
+    prevInterval :: Score -> (Score, Score) -> (Score, Score)
+    prevInterval bad (alpha, beta) =
       let width = (beta - alpha)
           width' = selectScale width `scaleScore` width
-          alpha' = prevScore alpha
-          beta'  = nextScore beta
+          alpha' = min bad (prevScore alpha)
+          beta'  = max bad (nextScore beta)
       in  if minimize
             then (beta', max beta' (beta' + width'))
             else (min alpha' (alpha' - width'), alpha')
@@ -342,24 +344,26 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
                     -> Maybe DepthIterationOutput -- ^ Results of previous depth iteration
                     -> [PossibleMove]
                     -> DepthParams
+                    -> TVar (Score, Score) -- ^ Global (alpha, beta)
                     -> (Score, Score) -- ^ (Alpha, Beta)
                     -> Checkers DepthIterationOutput
-    widthController allowNext allowPrev prevResult moves dp interval@(alpha,beta) =
+    widthController allowNext allowPrev prevResult moves dp globalInterval localInterval = do
+      interval@(alpha, beta) <- getRestrictedInterval globalInterval localInterval
       if alpha == beta
         then do
           $info "Empty scores interval: [{}]. We have to think that all moves have this score." (Single alpha)
           return [(move, alpha) | move <- moves]
         else do
-            results <- widthIteration prevResult moves dp interval
-            let (good, badScore, badMoves) = selectBestEdge interval moves results
+            results <- widthIteration prevResult moves dp globalInterval interval
+            let (goodScore, good, badScore, badMoves) = selectBestEdge interval moves results
                 (bestMoves, bestResults) = unzip good
             if length badMoves == length moves
               then
                 if allowPrev
                   then do
-                    let interval' = prevInterval interval
+                    let interval' = prevInterval badScore interval
                     $info "All moves are `too bad'; consider worse scores interval: [{} - {}]" interval'
-                    widthController False True prevResult badMoves dp interval'
+                    widthController False True prevResult badMoves dp globalInterval interval'
                   else do
                     $info "All moves are `too bad' ({}), but we have already checked worse interval; so this is the real score." (Single badScore)
                     return [(move, badScore) | move <- moves]
@@ -372,15 +376,19 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
                   _ ->
                     if allowNext
                       then do
-                        let interval'@(alpha',beta') = nextInterval interval
+                        let interval'@(alpha',beta') = nextInterval goodScore interval
                         $info "Some moves ({} of them) are `too good'; consider better scores interval: [{} - {}]" (length bestMoves, alpha', beta')
-                        widthController True False prevResult bestMoves dp interval'
+                        widthController True False prevResult bestMoves dp globalInterval interval'
                       else do
                         $info  "Some moves ({} of them) are `too good'; but we have already checked better interval; so this is the real score" (Single $ length bestMoves)
                         return bestResults
 
-    scoreMoves :: [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers [Either Error (PossibleMove, Score)]
-    scoreMoves moves dp (alpha, beta) = do
+    scoreMoves :: [PossibleMove]
+               -> DepthParams
+               -> TVar (Score, Score) -- ^ Global interval
+               -> (Score, Score)      -- ^ Local interval
+               -> Checkers [Either Error (PossibleMove, Score)]
+    scoreMoves moves dp globalInterval (localAlpha, localBeta) = do
       let var = aichData handle
       let processor = aichProcessor handle
       let inputs = [
@@ -392,22 +400,33 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
               smiDepth = dp,
               smiBoard = board,
               smiMove = move,
-              smiAlpha = alpha,
-              smiBeta = beta
+              smiGlobalInterval = globalInterval,
+              smiAlpha = localAlpha,
+              smiBeta = localBeta
             } | move <- moves ]
       process' processor inputs
     
-    scoreMoves' :: [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers DepthIterationOutput
-    scoreMoves' moves dp (alpha, beta) = do
-      results <- scoreMoves moves dp (alpha, beta)
+    scoreMoves' :: [PossibleMove]
+                -> DepthParams
+                -> TVar (Score, Score)
+                -> (Score, Score)
+                -> Checkers DepthIterationOutput
+    scoreMoves' moves dp globalInterval localInterval = do
+      results <- scoreMoves moves dp globalInterval localInterval
       case sequence results of
         Right result -> return result
         Left err -> throwError err
 
-    widthIteration :: Maybe DepthIterationOutput -> [PossibleMove] -> DepthParams -> (Score, Score) -> Checkers DepthIterationOutput
-    widthIteration prevResult moves dp (alpha, beta) = do
+    widthIteration :: Maybe DepthIterationOutput
+                  -> [PossibleMove]
+                  -> DepthParams
+                  -> TVar (Score, Score)
+                  -> (Score, Score)
+                  -> Checkers DepthIterationOutput
+    widthIteration prevResult moves dp globalInterval localInterval = do
+      (alpha, beta) <- getRestrictedInterval globalInterval localInterval
       $info "`- Considering scores interval: [{} - {}], depth = {}" (alpha, beta, dpTarget dp)
-      results <- scoreMoves moves dp (alpha, beta)
+      results <- scoreMoves moves dp globalInterval (alpha, beta)
       joinResults prevResult results
 
     joinResults :: Maybe DepthIterationOutput -> [Either Error (PossibleMove, Score)] -> Checkers DepthIterationOutput
@@ -426,9 +445,16 @@ runAI ai@(AlphaBeta params rules eval) handle gameId side board = do
 
     selectBestEdge (alpha, beta) moves results =
       let (good, bad) = if maximize then (beta, alpha) else (alpha, beta)
-          goodResults = [(move, (goodMoves, score)) | (move, (goodMoves, score)) <- zip moves results, score == good]
-          badResults = [move | (move, (_, score)) <- zip moves results, score == bad]
-      in  (goodResults, bad, badResults)
+          goodResults = [(move, (goodMoves, score)) | (move, (goodMoves, score)) <- zip moves results, score >= good]
+          badResults = [move | (move, (_, score)) <- zip moves results, score <= bad]
+          scores = map snd results
+          badScore = if maximize
+                       then minimum scores
+                       else maximum scores
+          goodScore = if maximize
+                        then maximum scores
+                        else minimum scores
+      in  (goodScore, goodResults, bad, badResults)
 
     select :: DepthIterationOutput -> Checkers AiOutput
     select pairs = do
@@ -447,10 +473,11 @@ doScore :: (GameRules rules, Evaluator eval)
         -> Side
         -> DepthParams
         -> Board
+        -> TVar (Score, Score)
         -> Score -- ^ Alpha
         -> Score -- ^ Beta
         -> Checkers Score
-doScore rules eval var params gameId side dp board alpha beta = do
+doScore rules eval var params gameId side dp board globalInterval alpha beta = do
     initState <- mkInitState
     out <- evalStateT (cachedScoreAB var params input) initState
     return $ soScore out
@@ -461,13 +488,14 @@ doScore rules eval var params gameId side dp board alpha beta = do
       let timeout = case abBaseTime params of
                       Nothing -> Nothing
                       Just sec -> Just $ TimeSpec (fromIntegral sec) 0
-      return $ ScoreState rules eval gameId [loose] now timeout
+      return $ ScoreState rules eval gameId globalInterval [loose] now timeout
 
 -- | State of ScoreM monad.
 data ScoreState rules eval = ScoreState {
     ssRules :: rules
   , ssEvaluator :: eval
   , ssGameId :: GameId
+  , ssGlobalInterval :: TVar (Score, Score)
   , ssBestScores :: [Score] -- ^ At each level of depth-first search, there is own "best score"
   , ssStartTime :: TimeSpec -- ^ Start time of calculation
   , ssTimeout :: Maybe TimeSpec -- ^ Nothing for "no timeout"
@@ -518,6 +546,28 @@ clamp alpha beta score
   | score > beta  = beta
   | otherwise = score
 
+restrictInterval :: MonadIO m => TVar (Score, Score) -> Side -> Score -> m ()
+restrictInterval var side score = liftIO $ atomically $ do
+  (globalAlpha, globalBeta) <- readTVar var
+  when (globalAlpha < score && score < globalBeta) $
+    if side == First -- maximize
+      then writeTVar var (score, globalBeta)
+      else writeTVar var (globalAlpha, score)
+
+getRestrictedInterval :: (MonadIO m, HasLogger m, HasLogContext m) => TVar (Score, Score) -> (Score, Score) -> m (Score, Score)
+getRestrictedInterval global (localAlpha, localBeta) = do
+  (globalAlpha, globalBeta) <- liftIO $ atomically $ readTVar global
+  let alpha1 = max globalAlpha localAlpha
+      beta1  = min globalBeta  localBeta
+  if  alpha1 <= beta1
+    then do
+         $trace "Restrict: Global [{}, {}] x Local [{}, {}] => [{}, {}]"
+                  (globalAlpha, globalBeta, localAlpha, localBeta, alpha1, beta1)
+         return (alpha1, beta1)
+    else do
+         let mid = (alpha1 + beta1) `divideScore` 2
+         return (mid, mid)
+
 -- | Calculate score of the board. 
 -- This uses the cache. It is called in the recursive call also.
 cachedScoreAB :: forall rules eval. (GameRules rules, Evaluator eval)
@@ -541,7 +591,7 @@ cachedScoreAB var params input = do
                   -- AB-section: alpha <= result <= beta. So here we clamp the value
                   -- that we got from cache.
                   case itemBound item of
-                    Exact -> return $ Just $ ScoreOutput (clamp alpha beta score) False
+                    Exact -> return $ Just $ ScoreOutput score False
                     Alpha -> if score <= alpha
                                then return $ Just $ ScoreOutput alpha False
                                else return Nothing
@@ -632,36 +682,49 @@ scoreAB var params input
       -- target depth is achieved, calculate score of current board directly
       evaluator <- gets ssEvaluator
       let score0 = evalBoard' evaluator board
+      (alpha, beta) <- getRestrictedInterval'
       $trace "    X Side: {}, A = {}, B = {}, score0 = {}" (show side, show alpha, show beta, show score0)
       quiescene <- checkQuiescene
       return $ ScoreOutput score0 quiescene
   | otherwise = do
       -- first, let "best" be the worse possible value
-      let best = if maximize then alpha else beta -- we assume alpha <= beta
-      push best
-      $trace "{}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
-      rules <- gets ssRules
-      moves <- lift $ getPossibleMoves var side board
-
-      -- this actually means that corresponding side lost.
-      when (null moves) $
-        $trace "{}`—No moves left." (Single indent)
-
-      dp' <- updateDepth params moves dp
-      let prevMove = siPrevMove input
-      moves' <- sortMoves prevMove moves
-      out <- iterateMoves (zip [1..] moves') dp'
-      pop
-      return out
+      let best = if maximize then loose else win -- we assume alpha <= beta
+      (alpha, beta) <- getRestrictedInterval'
+      if alpha == beta
+        then do
+            quiescene <- checkQuiescene
+            return $ ScoreOutput best quiescene
+        else do
+          push best
+          $trace "{}V Side: {}, A = {}, B = {}" (indent, show side, show alpha, show beta)
+          rules <- gets ssRules
+          moves <- lift $ getPossibleMoves var side board
+
+          -- this actually means that corresponding side lost.
+          when (null moves) $
+            $trace "{}`—No moves left." (Single indent)
+
+          dp' <- updateDepth params moves dp
+          let prevMove = siPrevMove input
+          moves' <- sortMoves prevMove moves
+          out <- iterateMoves (zip [1..] moves') dp'
+          pop
+          return out
 
   where
 
     side = siSide input
     dp = siDepth input
-    alpha = siAlpha input
-    beta = siBeta input
+    localAlpha = siAlpha input
+    localBeta = siBeta input
     board = siBoard input
 
+    getRestrictedInterval' = do
+      globalInterval <- gets ssGlobalInterval
+      result@(alpha, beta) <- getRestrictedInterval globalInterval (localAlpha, localBeta)
+      return result
+
+
     evalBoard' :: eval -> Board -> Score
     evalBoard' evaluator board = result
       where
@@ -776,6 +839,7 @@ scoreAB var params input
         go (input : inputs) = do
           out <- cachedScoreAB var params input
           let score = soScore out
+          (alpha, beta) <- getRestrictedInterval'
           if maximize && score >= beta || minimize && score <= alpha
             then go inputs
             else return out
@@ -796,6 +860,7 @@ scoreAB var params input
       evaluator <- gets ssEvaluator
       rules <- gets ssRules
       best <- getBest
+      let (alpha, beta) = (localAlpha, localBeta)
       let input' = input {
                       siSide = opposite side
                     , siAlpha = if maximize
diff --git a/src/AI/AlphaBeta/Types.hs b/src/AI/AlphaBeta/Types.hs
@@ -149,6 +149,7 @@ data ScoreMoveInput rules eval = ScoreMoveInput {
     smiAi :: AlphaBeta rules eval
   , smiCache :: AICacheHandle rules eval
   , smiGameId :: GameId
+  , smiGlobalInterval :: TVar (Score, Score)
   , smiSide :: Side 
   , smiDepth :: DepthParams
   , smiBoard :: Board