Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion poseidon-analysis-hs.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ extra-source-files: README.md,
library
exposed-modules: Poseidon.Analysis.FStatsConfig, Poseidon.Analysis.RASconfig, Poseidon.Analysis.Utils,
Poseidon.Analysis.CLI.FStats, Poseidon.Analysis.CLI.RAS,
Poseidon.Generator.CLI.AdmixPops, Poseidon.Generator.CLI.SpaceTime,
Poseidon.Generator.CLI.AdmixPops,
Poseidon.Generator.Parsers, Poseidon.Generator.Types,
Poseidon.Generator.SampleGeno, Poseidon.Generator.Utils
hs-source-dirs: src
Expand Down
17 changes: 7 additions & 10 deletions src-executables/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import Paths_poseidon_analysis_hs (version)
import Poseidon.CLI.OptparseApplicativeParsers
import Poseidon.PoseidonVersion (showPoseidonVersion,
validPoseidonVersions)
import Poseidon.Utils (LogMode (..),
import Poseidon.Utils (ErrorLength (..),
LogMode (..),
PlinkPopNameMode (..),
PoseidonException (..),
PoseidonIO, TestMode,
Expand Down Expand Up @@ -54,17 +55,12 @@ main = do
hPutStrLn stderr renderVersion
hPutStrLn stderr ""
(Options logMode testMode errLength plinkMode subcommand) <- OP.customExecParser (OP.prefs OP.showHelpOnEmpty) optParserInfo
catch (usePoseidonLogger logMode testMode plinkMode $ runCmd subcommand) (handler logMode testMode errLength plinkMode)
catch (usePoseidonLogger logMode testMode plinkMode errLength $ runCmd subcommand) (handler logMode testMode plinkMode errLength)
where
handler :: LogMode -> TestMode -> ErrorLength -> PlinkPopNameMode -> PoseidonException -> IO ()
handler l t len pm e = do
usePoseidonLogger l t pm $ logError $ truncateErr len $ renderPoseidonException e
handler :: LogMode -> TestMode -> PlinkPopNameMode -> ErrorLength -> PoseidonException -> IO ()
handler l t pm len e = do
usePoseidonLogger l t pm len . logError . renderPoseidonException $ e
exitFailure
truncateErr :: ErrorLength -> String -> String
truncateErr CharInf s = s
truncateErr (CharCount len) s
| length s > len = take len s ++ "... (see more with --errLength)"
| otherwise = s

runCmd :: Subcommand -> PoseidonIO ()
runCmd o = case o of
Expand Down Expand Up @@ -243,6 +239,7 @@ admixPopsOptParser = AdmixPopsOptions <$> parseGenoDataSources
<*> parseMaybeOutPackageName
<*> parseOutputPlinkPopMode
<*> parseOnlyLatest
<*> parseZipOut

parseIndWithAdmixtureSetDirect :: OP.Parser [RequestedInd]
parseIndWithAdmixtureSetDirect = OP.option (OP.eitherReader readIndWithAdmixtureSetString) (
Expand Down
10 changes: 5 additions & 5 deletions src/Poseidon/Analysis/CLI/FStats.hs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..),
PoseidonIO, envInputPlinkMode,
PoseidonIO, envErrorLength,
envLogAction, logInfo,
logWithEnv)
import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..),
Expand Down Expand Up @@ -134,11 +134,11 @@
logInfo "Computing stats:"
mapM_ (logInfo . summaryPrintFstats) statSpecs
logA <- envLogAction
inPlinkPopMode <- envInputPlinkMode
statsFold <- buildStatSpecsFold relevantPackages statSpecs
errLength <- envErrorLength

Check warning on line 138 in src/Poseidon/Analysis/CLI/FStats.hs

View check run for this annotation

Codecov / codecov/patch

src/Poseidon/Analysis/CLI/FStats.hs#L138

Added line #L138 was not covered by tests
blocks <- liftIO $ catch (
runSafeT $ do
(_, eigenstratProd) <- getJointGenotypeData logA False inPlinkPopMode relevantPackages Nothing
eigenstratProd <- getJointGenotypeData logA False relevantPackages Nothing

Check warning on line 141 in src/Poseidon/Analysis/CLI/FStats.hs

View check run for this annotation

Codecov / codecov/patch

src/Poseidon/Analysis/CLI/FStats.hs#L141

Added line #L141 was not covered by tests
let eigenstratProdFiltered =
eigenstratProd >->
P.filter chromFilter >->
Expand All @@ -148,7 +148,7 @@
JackknifePerN chunkSize -> chunkEigenstratByNrSnps chunkSize eigenstratProdFiltered
let summaryStatsProd = impurely foldsM statsFold eigenstratProdInChunks
purely P.fold list (summaryStatsProd >-> printBlockInfoPipe logA)
) (throwIO . PoseidonGenotypeExceptionForward)
) (throwIO . PoseidonGenotypeExceptionForward errLength)

Check warning on line 151 in src/Poseidon/Analysis/CLI/FStats.hs

View check run for this annotation

Codecov / codecov/patch

src/Poseidon/Analysis/CLI/FStats.hs#L151

Added line #L151 was not covered by tests
let jackknifeEstimates = processBlocks statSpecs blocks
let nrSitesList = [sum [(vals !! i) !! 1 | BlockData _ _ _ vals <- blocks] | i <- [0..(length statSpecs - 1)]]
let hasAscertainment = or $ do
Expand Down Expand Up @@ -238,7 +238,7 @@
ploidyVec <- makePloidyVec . getJointJanno $ packages
entityIndicesLookup <- do
let collectedSpecs = collectStatSpecGroups fStatSpecs
entityIndices <- sequence [resolveUniqueEntityIndices [s] indInfoCollection | s <- collectedSpecs]
entityIndices <- sequence [resolveUniqueEntityIndices False [s] indInfoCollection | s <- collectedSpecs]

Check warning on line 241 in src/Poseidon/Analysis/CLI/FStats.hs

View check run for this annotation

Codecov / codecov/patch

src/Poseidon/Analysis/CLI/FStats.hs#L241

Added line #L241 was not covered by tests
return . M.fromList . zip collectedSpecs $ entityIndices
blockAccum <- do
listOfInnerVectors <- forM fStatSpecs $ \(FStatSpec fType _ _) -> do
Expand Down
14 changes: 7 additions & 7 deletions src/Poseidon/Analysis/CLI/RAS.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import Poseidon.Package (PackageReadOptions (..),
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..),
PoseidonIO, envInputPlinkMode,
PoseidonIO, envErrorLength,
envLogAction, logInfo, logWithEnv)
import SequenceFormats.Bed (filterThroughBed, readBedFile)
import SequenceFormats.Eigenstrat (EigenstratSnpEntry (..),
Expand Down Expand Up @@ -137,10 +137,10 @@ runRAS rasOpts = do

-- run the fold and retrieve the block data needed for RAS computations and output
logA <- envLogAction
inPlinkPopMode <- envInputPlinkMode
errLength <- envErrorLength
blockData <- liftIO $ catch (
runSafeT $ do
(_, eigenstratProd) <- getJointGenotypeData logA False inPlinkPopMode relevantPackages Nothing
eigenstratProd <- getJointGenotypeData logA False relevantPackages Nothing
let eigenstratProdFiltered =
bedFilterFunc (eigenstratProd >->
P.filter (chromFilter (_rasExcludeChroms rasOpts)) >->
Expand All @@ -152,7 +152,7 @@ runRAS rasOpts = do
let summaryStatsProd = impurely foldsM rasFold eigenstratProdInChunks
logWithEnv logA . logInfo $ "performing counts"
purely P.fold list (summaryStatsProd >-> P.tee (P.map showBlockLogOutput >-> P.toHandle stderr))
) (throwIO . PoseidonGenotypeExceptionForward)
) (throwIO . PoseidonGenotypeExceptionForward errLength)

-- outputting and computing results
logInfo "collating results"
Expand Down Expand Up @@ -250,9 +250,9 @@ buildRasFold packages minFreq maxFreq maxM maybeOutgroup popLefts popRights = do
ploidyVec <- makePloidyVec . getJointJanno $ packages
outgroupI <- case maybeOutgroup of
Nothing -> return []
Just o -> resolveUniqueEntityIndices [o] indInfoCollection
leftI <- sequence [resolveUniqueEntityIndices [l] indInfoCollection | l <- popLefts]
rightI <- sequence [resolveUniqueEntityIndices [r] indInfoCollection | r <- popRights]
Just o -> resolveUniqueEntityIndices False [o] indInfoCollection
leftI <- sequence [resolveUniqueEntityIndices False [l] indInfoCollection | l <- popLefts]
rightI <- sequence [resolveUniqueEntityIndices False [r] indInfoCollection | r <- popRights]
let nL = length popLefts
nR = length popRights
let indivNames = map indInfoName (fst indInfoCollection)
Expand Down
10 changes: 6 additions & 4 deletions src/Poseidon/Analysis/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,19 @@ import Data.Aeson ((.:))
import Data.Aeson.Key (toString)
import Data.Aeson.KeyMap (toList)
import Data.Aeson.Types (Object, Parser)
import Data.Text (pack, unpack)
import qualified Data.Vector as V
import Pipes (Pipe, cat)
import qualified Pipes.Prelude as P
import Poseidon.ColumnTypes (GroupName (..))
import Poseidon.EntityTypes (IndividualInfo (..),
SignedEntitiesList,
indInfoConformsToEntitySpecs,
isLatestInCollection,
makePacNameAndVersion)
import Poseidon.Janno (JannoGenotypePloidy (..),
JannoList (..), JannoRow (..),
JannoRows (..), getJannoList)
JannoRow (..), JannoRows (..),
ListColumn (..))
import Poseidon.Package (PoseidonPackage (..),
getJannoRowsFromPac)
import Poseidon.Utils (PoseidonException (..), PoseidonIO,
Expand All @@ -45,13 +47,13 @@ addGroupDefs groupDefs pacs = do -- this loops through all input packages
isLatest <- isLatestInCollection pacs pac
let newJanno = JannoRows $ do -- this loops through the janno-file
jannoRow <- getJannoRowsFromPac pac
let oldGroupNames = (getJannoList . jGroupName) jannoRow
let oldGroupNames = [unpack gn | GroupName gn <- (getListColumn . jGroupName) jannoRow]
let additionalGroupNames = do -- this loops through each new group definition and returns those group names that apply to this janno-row
(groupName, signedEntityList) <- groupDefs
let indInfo = IndividualInfo (jPoseidonID jannoRow) oldGroupNames (makePacNameAndVersion pac)
True <- return $ indInfoConformsToEntitySpecs indInfo isLatest signedEntityList -- this checks whether a new group-def applies to this janno-row
return groupName -- only returns if the previous row pattern-matched, i.e. if the group applies
return $ jannoRow {jGroupName = JannoList (oldGroupNames ++ additionalGroupNames)} -- returns a new janno-row with the new group definitions
return $ jannoRow {jGroupName = ListColumn . map (GroupName . pack) $ (oldGroupNames ++ additionalGroupNames)} -- returns a new janno-row with the new group definitions
return $ pac {posPacJanno = newJanno} -- returns a new package with the new janno

parseGroupDefsFromJSON :: Object -> Parser [GroupDef]
Expand Down
47 changes: 29 additions & 18 deletions src/Poseidon/Generator/CLI/AdmixPops.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import Poseidon.Generator.Utils

import Control.Exception (catch, throwIO)
import Control.Monad (forM, unless, when)
import qualified Data.ByteString.Char8 as B
import Data.Function ((&))
import Data.List
import Data.Maybe
Expand Down Expand Up @@ -40,11 +41,12 @@ data AdmixPopsOptions = AdmixPopsOptions {
, _admixIndWithAdmixtureSet :: [RequestedInd]
, _admixIndWithAdmixtureSetFile :: Maybe FilePath
, _admixMethodSettings :: AdmixPopsMethodSettings
, _admixOutFormat :: GenotypeFormatSpec
, _admixOutFormat :: GenotypeOutFormatSpec
, _admixOutPath :: FilePath
, _admixOutPacName :: Maybe String
, _admixOutputPlinkPopMode :: PlinkPopNameMode
, _admixOnlyLatest :: Bool
, _admixOutZip :: Bool
}

runAdmixPops :: AdmixPopsOptions -> PoseidonIO ()
Expand All @@ -59,6 +61,7 @@ runAdmixPops (
maybeOutName
outPlinkPopMode
onlyLatest
outZip
) = do
let pacReadOpts = defaultPackageReadOptions {
_readOptIgnoreChecksums = True
Expand All @@ -76,16 +79,16 @@ runAdmixPops (
logInfo $ "Individuals: " ++ renderRequestedInds requestedInds
liftIO $ checkIndsWithAdmixtureSets requestedInds
-- load Poseidon packages
properPackages <- readPoseidonPackageCollection pacReadOpts $ [getPacBaseDirs x | x@PacBaseDir {} <- genoSources]
properPackages <- readPoseidonPackageCollection pacReadOpts $ [getPacBaseDir x | x@PacBaseDir {} <- genoSources]
pseudoPackages <- mapM makePseudoPackageFromGenotypeData $ [getGenoDirect x | x@GenoDirect {} <- genoSources]
logInfo $ "Unpackaged genotype data files loaded: " ++ show (length pseudoPackages)
let allPackages = properPackages ++ pseudoPackages
-- determine relevant packages
let popNames = concat $ map (map _inPopName) $ map _inPopSet requestedInds
let popNames = concatMap (map _inPopName . _inPopSet) requestedInds
relevantPackages <- filterPackagesByPops popNames allPackages
-- gather additional info for requested inds
preparedInds <- mapM (`gatherInfoForInd` relevantPackages) requestedInds
-- create new package --
-- create new package
let outName = case maybeOutName of -- take basename of outPath, if name is not provided
Just x -> x
Nothing -> takeBaseName outPath
Expand All @@ -94,24 +97,32 @@ runAdmixPops (
logInfo $ "Writing to directory (will be created if missing): " ++ outPath
liftIO $ createDirectoryIfMissing True outPath
-- compile genotype data structure
let (outInd, outSnp, outGeno) = case outFormat of
GenotypeFormatEigenstrat -> (outName <.> ".ind", outName <.> ".snp", outName <.> ".geno")
GenotypeFormatPlink -> (outName <.> ".fam", outName <.> ".bim", outName <.> ".bed")
let genotypeData = GenotypeDataSpec outFormat outGeno Nothing outSnp Nothing outInd Nothing Nothing
pac = newMinimalPackageTemplate outPath outName genotypeData
let gz = if outZip then "gz" else ""
gFileSpec = case outFormat of
GenotypeOutFormatEigenstrat -> GenotypeEigenstrat (outName <.> ".geno" <.> gz) Nothing (outName <.> ".snp" <.> gz) Nothing (outName <.> ".ind") Nothing
GenotypeOutFormatPlink -> GenotypePlink (outName <.> ".bed" <.> gz) Nothing (outName <.> ".bim" <.> gz) Nothing (outName <.> ".fam") Nothing
GenotypeOutFormatVCF -> GenotypeVCF (outName <.> ".vcf" <.> gz) Nothing
let genotypeData = GenotypeDataSpec gFileSpec Nothing
pac <- newMinimalPackageTemplate outPath outName genotypeData
liftIO $ writePoseidonPackage pac
-- compile genotype data
logInfo "Compiling individuals"
logA <- envLogAction
currentTime <- liftIO getCurrentTime
errLength <- envErrorLength
liftIO $ catch (
runSafeT $ do
(_, eigenstratProd) <- getJointGenotypeData logA False outPlinkPopMode relevantPackages Nothing
let (outG, outS, outI) = (outPath </> outGeno, outPath </> outSnp, outPath </> outInd)
let newIndEntries = map (\x -> EigenstratIndEntry (_indName x) Unknown (_groupName x)) preparedInds
eigenstratProd <- getJointGenotypeData logA False relevantPackages Nothing
let (outG, outS, outI) = case gFileSpec of
GenotypeEigenstrat outGeno _ outSnp _ outInd _ -> (outPath </> outGeno, outPath </> outSnp, outPath </> outInd)
GenotypePlink outGeno _ outSnp _ outInd _ -> (outPath </> outGeno, outPath </> outSnp, outPath </> outInd)
GenotypeVCF outGeno _ -> (outPath </> outGeno, "", "")
let newIndEntries = map (\x -> EigenstratIndEntry (B.pack . _indName $ x) Unknown (B.pack . _groupName $ x)) preparedInds
jannoRows = getJannoRows $ createMinimalJanno newIndEntries
let outConsumer = case outFormat of
GenotypeFormatEigenstrat -> writeEigenstrat outG outS outI newIndEntries
GenotypeFormatPlink -> writePlink outG outS outI (map (eigenstratInd2PlinkFam outPlinkPopMode) newIndEntries)
GenotypeOutFormatEigenstrat -> writeEigenstrat outG outS outI newIndEntries
GenotypeOutFormatPlink -> writePlink outG outS outI (map (eigenstratInd2PlinkFam outPlinkPopMode) newIndEntries)
GenotypeOutFormatVCF -> writeVCF logA jannoRows outG
case methodSetting of
PerSNP marginalizeMissing -> do
runEffect $ eigenstratProd >->
Expand All @@ -127,7 +138,7 @@ runAdmixPops (
) >->
printSNPCopyProgress logA currentTime >->
outConsumer
) (\e -> throwIO $ PoseidonGenotypeExceptionForward e)
) (throwIO . PoseidonGenotypeExceptionForward errLength)
logInfo "Done"
where
chunkEigenstratByNrSnps chunkSize = view (PG.chunksOf chunkSize)
Expand Down Expand Up @@ -155,7 +166,7 @@ filterPackagesByPops :: [String] -> [PoseidonPackage] -> PoseidonIO [PoseidonPac
filterPackagesByPops pops packages = do
fmap catMaybes . forM packages $ \pac -> do
inds <- loadIndividuals (posPacBaseDir pac) (posPacGenotypeData pac)
let groupNamesPac = [groupName | EigenstratIndEntry _ _ groupName <- inds]
let groupNamesPac = [B.unpack groupName | EigenstratIndEntry _ _ groupName <- inds]
if not (null (groupNamesPac `intersect` pops))
then return (Just pac)
else return Nothing
Expand All @@ -169,8 +180,8 @@ extractIndsPerPop :: PopFrac -> [PoseidonPackage] -> PoseidonIO PopFracConcrete
extractIndsPerPop (PopFrac pop_ frac_) relevantPackages = do
let allPackageNames = map getPacName relevantPackages
allIndEntries <- mapM (\pac -> loadIndividuals (posPacBaseDir pac) (posPacGenotypeData pac)) relevantPackages
let filterFunc (_,_,EigenstratIndEntry _ _ _group) = _group == pop_
indNames = map extractIndName $ filter filterFunc (zipGroup allPackageNames allIndEntries)
let filterFunc (_,_,EigenstratIndEntry _ _ _group) = B.unpack _group == pop_
indNames = map (B.unpack . extractIndName) $ filter filterFunc (zipGroup allPackageNames allIndEntries)
indIDs = map extractFirst $ filter filterFunc (zipGroup allPackageNames allIndEntries)
return (PopFracConcrete pop_ frac_ (zip indNames indIDs))
where
Expand Down
Loading