From 4b9ef74be4cf1a90991767ee9bc2ac4d0496b268 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Fri, 13 May 2016 20:33:20 -0700 Subject: [PATCH 1/6] THRAX-11 Publish thrax to Sonatype --- pom.xml | 372 ++++++++++++++++++ src/{ => main/java}/edu/jhu/thrax/Thrax.java | 0 .../thrax/datatypes/AlignedSentencePair.java | 0 .../edu/jhu/thrax/datatypes/Alignment.java | 0 .../jhu/thrax/datatypes/ArrayAlignment.java | 0 .../jhu/thrax/datatypes/HierarchicalRule.java | 0 .../edu/jhu/thrax/datatypes/IntPair.java | 0 .../edu/jhu/thrax/datatypes/PhrasePair.java | 0 .../thrax/distributional/ContextPhrase.java | 0 .../ContextPhraseExtractor.java | 0 .../thrax/distributional/FeatureClass.java | 0 .../thrax/distributional/FeatureEncoder.java | 0 .../jhu/thrax/distributional/FeatureSet.java | 0 .../thrax/distributional/FeatureTypes.java | 0 .../extraction/HierarchicalRuleExtractor.java | 0 .../jhu/thrax/extraction/HieroLabeler.java | 0 .../edu/jhu/thrax/extraction/LabelCache.java | 0 .../edu/jhu/thrax/extraction/Labeling.java | 0 .../thrax/extraction/ManualSpanLabeler.java | 0 .../edu/jhu/thrax/extraction/SAMTLabeler.java | 0 .../edu/jhu/thrax/extraction/SpanLabeler.java | 0 .../hadoop/comparators/FieldComparator.java | 0 .../PrimitiveArrayMarginalComparator.java | 0 .../comparators/TextMarginalComparator.java | 0 .../hadoop/datatypes/AlignedRuleWritable.java | 0 .../hadoop/datatypes/AlignmentWritable.java | 0 .../thrax/hadoop/datatypes/Annotation.java | 0 .../thrax/hadoop/datatypes/FeatureMap.java | 0 .../thrax/hadoop/datatypes/FeaturePair.java | 0 .../thrax/hadoop/datatypes/FeatureValue.java | 0 .../jhu/thrax/hadoop/datatypes/IntPair.java | 0 .../hadoop/datatypes/PrimitiveUtils.java | 0 .../thrax/hadoop/datatypes/RuleWritable.java | 0 .../jhu/thrax/hadoop/datatypes/TextPair.java | 0 .../hadoop/distributional/CommonLSH.java | 6 +- .../distributional/ContextWritable.java | 0 .../DistributionalContextCombiner.java | 0 .../DistributionalContextMapper.java | 0 .../DistributionalContextReducer.java | 2 +- .../distributional/SignatureWritable.java | 0 .../hadoop/extraction/ExtractionCombiner.java | 0 .../hadoop/extraction/ExtractionMapper.java | 0 .../hadoop/extraction/ExtractionReducer.java | 0 .../HierarchicalRuleWritableExtractor.java | 0 .../extraction/RuleWritableExtractor.java | 0 .../RuleWritableExtractorFactory.java | 0 .../hadoop/features/AbstractnessFeature.java | 0 .../features/AdjacentNonTerminalsFeature.java | 0 .../CharacterCompressionRatioFeature.java | 0 .../CharacterCountDifferenceFeature.java | 0 .../ConsumeSourceTerminalsFeature.java | 0 .../jhu/thrax/hadoop/features/Feature.java | 0 .../hadoop/features/GlueRuleFeature.java | 0 .../hadoop/features/IdentityFeature.java | 0 .../hadoop/features/LexicalityFeature.java | 0 .../hadoop/features/MonotonicFeature.java | 0 .../hadoop/features/PhrasePenaltyFeature.java | 0 .../ProduceTargetTerminalsFeature.java | 0 .../thrax/hadoop/features/SimpleFeature.java | 0 .../hadoop/features/SimpleFeatureFactory.java | 0 .../features/SourceWordCounterFeature.java | 0 .../features/TargetWordCounterFeature.java | 0 .../features/WordCompressionRatioFeature.java | 0 .../features/WordCountDifferenceFeature.java | 0 .../features/WordLengthDifferenceFeature.java | 0 .../WordLexicalProbabilityCalculator.java | 0 .../thrax/hadoop/features/XRuleFeature.java | 0 .../features/annotation/AlignmentFeature.java | 0 .../annotation/AnnotationFeature.java | 0 .../annotation/AnnotationFeatureFactory.java | 0 .../annotation/AnnotationFeatureJob.java | 0 .../AnnotationPassthroughFeature.java | 0 .../annotation/AnnotationReducer.java | 0 .../features/annotation/CountFeature.java | 0 .../features/annotation/LogCountFeature.java | 0 .../annotation/RarityPenaltyFeature.java | 0 ...eGivenTargetLexicalProbabilityFeature.java | 0 ...tGivenSourceLexicalProbabilityFeature.java | 0 .../UnalignedSourceCounterFeature.java | 0 .../UnalignedTargetCounterFeature.java | 0 .../mapred/LhsGivenSourcePhraseFeature.java | 0 .../mapred/LhsGivenTargetPhraseFeature.java | 0 .../features/mapred/MapReduceFeature.java | 0 .../mapred/MapReduceFeatureFactory.java | 0 .../features/mapred/SourceCountFeature.java | 0 .../mapred/SourcePhraseGivenLHSFeature.java | 0 .../SourcePhraseGivenTargetFeature.java | 0 .../SourcePhraseGivenTargetandLHSFeature.java | 0 .../features/mapred/TargetCountFeature.java | 0 .../mapred/TargetPhraseGivenLHSFeature.java | 0 .../TargetPhraseGivenSourceFeature.java | 0 .../TargetPhraseGivenSourceandLHSFeature.java | 0 .../pivot/NonAggregatingPivotedFeature.java | 0 .../pivot/PivotedAnnotationFeature.java | 0 .../hadoop/features/pivot/PivotedFeature.java | 0 .../features/pivot/PivotedFeatureFactory.java | 0 ...ivotedLexicalSourceGivenTargetFeature.java | 0 ...ivotedLexicalTargetGivenSourceFeature.java | 0 .../PivotedLhsGivenSourcePhraseFeature.java | 0 .../PivotedLhsGivenTargetPhraseFeature.java | 0 .../pivot/PivotedNegLogProbFeature.java | 0 .../pivot/PivotedRarityPenaltyFeature.java | 0 .../PivotedSourcePhraseGivenLHSFeature.java | 0 ...dSourcePhraseGivenTargetAndLHSFeature.java | 0 ...PivotedSourcePhraseGivenTargetFeature.java | 0 .../PivotedTargetPhraseGivenLHSFeature.java | 0 ...dTargetPhraseGivenSourceAndLHSFeature.java | 0 ...PivotedTargetPhraseGivenSourceFeature.java | 0 .../jhu/thrax/hadoop/jobs/DefaultValues.java | 0 .../DistributionalContextExtractionJob.java | 0 .../jobs/DistributionalContextSortingJob.java | 0 .../jhu/thrax/hadoop/jobs/ExtractionJob.java | 0 .../hadoop/jobs/FeatureCollectionJob.java | 0 .../edu/jhu/thrax/hadoop/jobs/JobState.java | 0 .../edu/jhu/thrax/hadoop/jobs/OutputJob.java | 0 .../hadoop/jobs/ParaphraseAggregationJob.java | 0 .../hadoop/jobs/ParaphrasePivotingJob.java | 0 .../edu/jhu/thrax/hadoop/jobs/Scheduler.java | 0 .../thrax/hadoop/jobs/SchedulerException.java | 0 ...urceWordGivenTargetWordProbabilityJob.java | 0 ...rgetWordGivenSourceWordProbabilityJob.java | 0 .../edu/jhu/thrax/hadoop/jobs/ThraxJob.java | 0 .../jhu/thrax/hadoop/jobs/VocabularyJob.java | 0 .../jhu/thrax/hadoop/jobs/WordLexprobJob.java | 0 .../thrax/hadoop/output/OutputReducer.java | 0 .../paraphrasing/AggregationCombiner.java | 0 .../paraphrasing/AggregationMapper.java | 0 .../paraphrasing/AggregationReducer.java | 0 .../FeatureCollectionReducer.java | 0 .../hadoop/paraphrasing/PivotingMapper.java | 0 .../hadoop/paraphrasing/PivotingReducer.java | 0 .../thrax/hadoop/tools/ExtractionTool.java | 0 .../jhu/thrax/hadoop/tools/FeatureTool.java | 0 .../jhu/thrax/hadoop/tools/OutputTool.java | 0 ...rceWordGivenTargetWordProbabilityTool.java | 0 ...getWordGivenSourceWordProbabilityTool.java | 0 .../thrax/lexprob/HashMapLexprobTable.java | 0 .../lexprob/LexicalProbabilityTable.java | 0 .../edu/jhu/thrax/lexprob/LexprobTest.java | 0 .../lexprob/SequenceFileLexprobTable.java | 0 .../edu/jhu/thrax/lexprob/TableEntry.java | 0 .../jhu/thrax/lexprob/TrieLexprobTable.java | 0 .../edu/jhu/thrax/syntax/LatticeArray.java | 0 .../edu/jhu/thrax/syntax/ParseLattice.java | 0 .../java}/edu/jhu/thrax/syntax/ParseTree.java | 0 .../jhu/thrax/tools/ExtractPropbankRules.java | 0 .../edu/jhu/thrax/tools/JudgeParaphrases.java | 0 .../jhu/thrax/tools/ParaphraseCoverage.java | 0 .../jhu/thrax/tools/ParaphraseIntersect.java | 0 .../jhu/thrax/tools/ParaphraseOverlap.java | 0 .../edu/jhu/thrax/tools/ParaphraseScore.java | 0 .../jhu/thrax/tools/ParaphraseWordNet.java | 0 .../jhu/thrax/tools/SequenceToGrammar.java | 0 .../jhu/thrax/tools/SequenceToSignatures.java | 0 .../edu/jhu/thrax/tools/SplitAndFilter.java | 0 .../thrax/util/BackwardsCompatibility.java | 0 .../edu/jhu/thrax/util/ConfFileParser.java | 0 .../edu/jhu/thrax/util/CreateGlueGrammar.java | 0 .../thrax/util/DefaultConfigFileLoader.java | 0 .../jhu/thrax/util/ExternalizableToUtf8.java | 0 .../java}/edu/jhu/thrax/util/FormatUtils.java | 0 .../edu/jhu/thrax/util/GrammarComparison.java | 0 .../java}/edu/jhu/thrax/util/Intersect.java | 0 .../edu/jhu/thrax/util/MalformedInput.java | 0 .../jhu/thrax/util/MalformedInput.properties | 0 .../java}/edu/jhu/thrax/util/MurmurHash.java | 0 .../java}/edu/jhu/thrax/util/NegLogMath.java | 0 .../jhu/thrax/util/SequenceFileCreator.java | 0 .../edu/jhu/thrax/util/TestSetFilter.java | 0 .../java}/edu/jhu/thrax/util/Vocabulary.java | 0 .../util/amazon/AmazonConfigFileLoader.java | 0 .../exceptions/ConfigurationException.java | 0 .../exceptions/EmptyAlignmentException.java | 0 .../exceptions/EmptySentenceException.java | 0 .../InconsistentAlignmentException.java | 0 .../exceptions/MalformedInputException.java | 0 .../exceptions/MalformedParseException.java | 0 .../exceptions/NotEnoughFieldsException.java | 0 .../edu/jhu/thrax/util/io/InputUtilities.java | 0 .../edu/jhu/thrax/util/io/LineReader.java | 0 .../java}/edu/jhu/thrax/util/io/Reader.java | 0 .../thrax/datatypes/ArrayAlignmentTest.java | 0 .../jhu/thrax/extraction/SAMTLabelerTest.java | 0 .../edu/jhu/thrax/syntax/ParseTreeTest.java | 0 .../jhu/thrax/util/io/InputUtilitiesTest.java | 0 185 files changed, 376 insertions(+), 4 deletions(-) create mode 100644 pom.xml rename src/{ => main/java}/edu/jhu/thrax/Thrax.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/AlignedSentencePair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/Alignment.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/ArrayAlignment.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/HierarchicalRule.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/IntPair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/datatypes/PhrasePair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/ContextPhrase.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/ContextPhraseExtractor.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/FeatureClass.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/FeatureEncoder.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/FeatureSet.java (100%) rename src/{ => main/java}/edu/jhu/thrax/distributional/FeatureTypes.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/HieroLabeler.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/LabelCache.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/Labeling.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/ManualSpanLabeler.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/SAMTLabeler.java (100%) rename src/{ => main/java}/edu/jhu/thrax/extraction/SpanLabeler.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/comparators/FieldComparator.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/comparators/PrimitiveArrayMarginalComparator.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/comparators/TextMarginalComparator.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/AlignedRuleWritable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/AlignmentWritable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/Annotation.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/FeatureMap.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/FeaturePair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/FeatureValue.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/IntPair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/PrimitiveUtils.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/RuleWritable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/datatypes/TextPair.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/CommonLSH.java (63%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/ContextWritable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/DistributionalContextCombiner.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/DistributionalContextMapper.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java (95%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/distributional/SignatureWritable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/ExtractionCombiner.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/ExtractionReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/HierarchicalRuleWritableExtractor.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractor.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractorFactory.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/AbstractnessFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/AdjacentNonTerminalsFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/CharacterCompressionRatioFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/CharacterCountDifferenceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/ConsumeSourceTerminalsFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/Feature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/GlueRuleFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/IdentityFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/LexicalityFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/MonotonicFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/PhrasePenaltyFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/ProduceTargetTerminalsFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/SimpleFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/SimpleFeatureFactory.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/SourceWordCounterFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/TargetWordCounterFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/WordCompressionRatioFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/WordCountDifferenceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/WordLengthDifferenceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/XRuleFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AlignmentFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureFactory.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AnnotationPassthroughFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/AnnotationReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/CountFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/LogCountFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/RarityPenaltyFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/UnalignedSourceCounterFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/annotation/UnalignedTargetCounterFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/LhsGivenSourcePhraseFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/LhsGivenTargetPhraseFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/SourceCountFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetandLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/TargetCountFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceandLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/NonAggregatingPivotedFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedAnnotationFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedFeatureFactory.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalSourceGivenTargetFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalTargetGivenSourceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenSourcePhraseFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenTargetPhraseFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedNegLogProbFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedRarityPenaltyFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetAndLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceAndLHSFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceFeature.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/DefaultValues.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/JobState.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/OutputJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/Scheduler.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/SchedulerException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/SourceWordGivenTargetWordProbabilityJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/TargetWordGivenSourceWordProbabilityJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/ThraxJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/output/OutputReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/AggregationCombiner.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/AggregationMapper.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/AggregationReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/FeatureCollectionReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/PivotingMapper.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/tools/ExtractionTool.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/tools/FeatureTool.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/tools/OutputTool.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java (100%) rename src/{ => main/java}/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/HashMapLexprobTable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/LexicalProbabilityTable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/LexprobTest.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/TableEntry.java (100%) rename src/{ => main/java}/edu/jhu/thrax/lexprob/TrieLexprobTable.java (100%) rename src/{ => main/java}/edu/jhu/thrax/syntax/LatticeArray.java (100%) rename src/{ => main/java}/edu/jhu/thrax/syntax/ParseLattice.java (100%) rename src/{ => main/java}/edu/jhu/thrax/syntax/ParseTree.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ExtractPropbankRules.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/JudgeParaphrases.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ParaphraseCoverage.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ParaphraseIntersect.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ParaphraseOverlap.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ParaphraseScore.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/ParaphraseWordNet.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/SequenceToGrammar.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/SequenceToSignatures.java (100%) rename src/{ => main/java}/edu/jhu/thrax/tools/SplitAndFilter.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/BackwardsCompatibility.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/ConfFileParser.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/CreateGlueGrammar.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/DefaultConfigFileLoader.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/ExternalizableToUtf8.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/FormatUtils.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/GrammarComparison.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/Intersect.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/MalformedInput.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/MalformedInput.properties (100%) rename src/{ => main/java}/edu/jhu/thrax/util/MurmurHash.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/NegLogMath.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/SequenceFileCreator.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/TestSetFilter.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/Vocabulary.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/amazon/AmazonConfigFileLoader.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/ConfigurationException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/EmptyAlignmentException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/EmptySentenceException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/InconsistentAlignmentException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/MalformedInputException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/MalformedParseException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/exceptions/NotEnoughFieldsException.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/io/InputUtilities.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/io/LineReader.java (100%) rename src/{ => main/java}/edu/jhu/thrax/util/io/Reader.java (100%) rename {test => src/test/java}/edu/jhu/thrax/datatypes/ArrayAlignmentTest.java (100%) rename {test => src/test/java}/edu/jhu/thrax/extraction/SAMTLabelerTest.java (100%) rename {test => src/test/java}/edu/jhu/thrax/syntax/ParseTreeTest.java (100%) rename {test => src/test/java}/edu/jhu/thrax/util/io/InputUtilitiesTest.java (100%) diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..7ddecda --- /dev/null +++ b/pom.xml @@ -0,0 +1,372 @@ + + + + + org.sonatype.oss + oss-parent + 7 + + + 4.0.0 + org.joshua-decoder + thrax + Thrax + jar + 1.0-SNAPSHOT + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + https://github.com/joshua-decoder/thrax + + Hadoop-based tool for extraction of large + scale synchronous grammars for paraphrasing and machine translation. + + + + https://github.com/joshua-decoder/thrax.git + scm:git:git://github.com/joshua-decoder/thrax.git + scm:git:git@github.com/joshua-decoder/thrax.git + HEAD + + + + + sonatype-nexus-staging + Nexus Staging Repository + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + + + GitHub Issues + https://github.com/joshua-decoder/thrax/issues + + + + + Project Mailing List + dev[at]joshua[dot]incubator[dot]apache[dot]org + + + + + Joshua Decoder + https://github.com/joshua-decoder + + + + + jweese + Jonny Weese + jonny[at]cs[dot]jhu[dot]edu + + + jganitkevitch + Juri Ganitkevitch + juri[at]cs[dot]jhu[dot]edu + + + mjpost + Matt Post + post[at]cs[dot]jhu[dot]edu + + + dowobeha + Lane Schwartz + + + lewismc + Lewis John McGibbney + lewismc[at]apache[dot]org + + + + + install + target + ${basedir}/target/classes + ${project.artifactId}-${project.version} + ${basedir}/target/test-classes + ${basedir}/src/main/java + ${basedir}/src/test/java + + + src/test/resources/ + + ** + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + ${maven-deploy-plugin.version} + + + org.apache.maven.plugins + maven-release-plugin + ${maven-release-plugin.version} + + forked-path + false + -Prelease + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + de.thetaphi + forbiddenapis + [1.8,) + + testCheck + check + + + + + + + + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-compiler-plugin.version} + + + + test-jar + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + true + + ${javac.src.version} + ${javac.target.version} + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven-surfire-plugin.version} + true + + + ${project.basedir}/target/test-data/ + + -Xmx512m + always + false + + + + com.googlecode.maven-java-formatter-plugin + maven-java-formatter-plugin + 0.4 + + ${project.basedir}/doc/eclipse-formatter.xml + + + + de.thetaphi + forbiddenapis + 1.8 + + + true + + false + + jdk-unsafe + jdk-deprecated + jdk-system-out + + + + + + + check + testCheck + + + + + + + + + + release + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + attach-sources + + jar-no-fork + + + + + true + true + + + ${implementation.build} + ${maven.build.timestamp} + ${javac.src.version} + ${javac.target.version} + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + + attach-javadocs + + jar + + + true + + + true + true + + + ${implementation.build} + ${maven.build.timestamp} + ${javac.src.version} + ${javac.target.version} + + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + ${maven-gpg-plugin.version} + + + sign-artifacts + verify + + sign + + + + + + net.ju-n.maven.plugins + checksum-maven-plugin + ${checksum-maven-plugin.version} + + + + + + + + + 2.3.2 + 2.5 + 2.4 + 2.12 + 2.5.1 + 2.1.2 + 2.9.1 + 1.4 + 0.8 + 2.2.2 + 2.5 + 1.0.1 + + + ${scmBranch}@r${buildNumber} + 1.7 + 1.7 + 1.7 + yyyy-MM-dd HH:mm:ssZ + false + ${project.build.finalName} + + + + + + + com.amazonaws + aws-java-sdk + 1.1.3 + + + org.apache.commons + commons-lang3 + 3.1 + + + org.apache.hadoop + hadoop-common + 2.5.2 + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.5.2 + + + edu.jhu.hlt + jerboa + 1.0.0 + + + org.testng + testng + 5.8 + jdk15 + test + + + diff --git a/src/edu/jhu/thrax/Thrax.java b/src/main/java/edu/jhu/thrax/Thrax.java similarity index 100% rename from src/edu/jhu/thrax/Thrax.java rename to src/main/java/edu/jhu/thrax/Thrax.java diff --git a/src/edu/jhu/thrax/datatypes/AlignedSentencePair.java b/src/main/java/edu/jhu/thrax/datatypes/AlignedSentencePair.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/AlignedSentencePair.java rename to src/main/java/edu/jhu/thrax/datatypes/AlignedSentencePair.java diff --git a/src/edu/jhu/thrax/datatypes/Alignment.java b/src/main/java/edu/jhu/thrax/datatypes/Alignment.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/Alignment.java rename to src/main/java/edu/jhu/thrax/datatypes/Alignment.java diff --git a/src/edu/jhu/thrax/datatypes/ArrayAlignment.java b/src/main/java/edu/jhu/thrax/datatypes/ArrayAlignment.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/ArrayAlignment.java rename to src/main/java/edu/jhu/thrax/datatypes/ArrayAlignment.java diff --git a/src/edu/jhu/thrax/datatypes/HierarchicalRule.java b/src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/HierarchicalRule.java rename to src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java diff --git a/src/edu/jhu/thrax/datatypes/IntPair.java b/src/main/java/edu/jhu/thrax/datatypes/IntPair.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/IntPair.java rename to src/main/java/edu/jhu/thrax/datatypes/IntPair.java diff --git a/src/edu/jhu/thrax/datatypes/PhrasePair.java b/src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java similarity index 100% rename from src/edu/jhu/thrax/datatypes/PhrasePair.java rename to src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java diff --git a/src/edu/jhu/thrax/distributional/ContextPhrase.java b/src/main/java/edu/jhu/thrax/distributional/ContextPhrase.java similarity index 100% rename from src/edu/jhu/thrax/distributional/ContextPhrase.java rename to src/main/java/edu/jhu/thrax/distributional/ContextPhrase.java diff --git a/src/edu/jhu/thrax/distributional/ContextPhraseExtractor.java b/src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java similarity index 100% rename from src/edu/jhu/thrax/distributional/ContextPhraseExtractor.java rename to src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java diff --git a/src/edu/jhu/thrax/distributional/FeatureClass.java b/src/main/java/edu/jhu/thrax/distributional/FeatureClass.java similarity index 100% rename from src/edu/jhu/thrax/distributional/FeatureClass.java rename to src/main/java/edu/jhu/thrax/distributional/FeatureClass.java diff --git a/src/edu/jhu/thrax/distributional/FeatureEncoder.java b/src/main/java/edu/jhu/thrax/distributional/FeatureEncoder.java similarity index 100% rename from src/edu/jhu/thrax/distributional/FeatureEncoder.java rename to src/main/java/edu/jhu/thrax/distributional/FeatureEncoder.java diff --git a/src/edu/jhu/thrax/distributional/FeatureSet.java b/src/main/java/edu/jhu/thrax/distributional/FeatureSet.java similarity index 100% rename from src/edu/jhu/thrax/distributional/FeatureSet.java rename to src/main/java/edu/jhu/thrax/distributional/FeatureSet.java diff --git a/src/edu/jhu/thrax/distributional/FeatureTypes.java b/src/main/java/edu/jhu/thrax/distributional/FeatureTypes.java similarity index 100% rename from src/edu/jhu/thrax/distributional/FeatureTypes.java rename to src/main/java/edu/jhu/thrax/distributional/FeatureTypes.java diff --git a/src/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java b/src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java similarity index 100% rename from src/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java rename to src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java diff --git a/src/edu/jhu/thrax/extraction/HieroLabeler.java b/src/main/java/edu/jhu/thrax/extraction/HieroLabeler.java similarity index 100% rename from src/edu/jhu/thrax/extraction/HieroLabeler.java rename to src/main/java/edu/jhu/thrax/extraction/HieroLabeler.java diff --git a/src/edu/jhu/thrax/extraction/LabelCache.java b/src/main/java/edu/jhu/thrax/extraction/LabelCache.java similarity index 100% rename from src/edu/jhu/thrax/extraction/LabelCache.java rename to src/main/java/edu/jhu/thrax/extraction/LabelCache.java diff --git a/src/edu/jhu/thrax/extraction/Labeling.java b/src/main/java/edu/jhu/thrax/extraction/Labeling.java similarity index 100% rename from src/edu/jhu/thrax/extraction/Labeling.java rename to src/main/java/edu/jhu/thrax/extraction/Labeling.java diff --git a/src/edu/jhu/thrax/extraction/ManualSpanLabeler.java b/src/main/java/edu/jhu/thrax/extraction/ManualSpanLabeler.java similarity index 100% rename from src/edu/jhu/thrax/extraction/ManualSpanLabeler.java rename to src/main/java/edu/jhu/thrax/extraction/ManualSpanLabeler.java diff --git a/src/edu/jhu/thrax/extraction/SAMTLabeler.java b/src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java similarity index 100% rename from src/edu/jhu/thrax/extraction/SAMTLabeler.java rename to src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java diff --git a/src/edu/jhu/thrax/extraction/SpanLabeler.java b/src/main/java/edu/jhu/thrax/extraction/SpanLabeler.java similarity index 100% rename from src/edu/jhu/thrax/extraction/SpanLabeler.java rename to src/main/java/edu/jhu/thrax/extraction/SpanLabeler.java diff --git a/src/edu/jhu/thrax/hadoop/comparators/FieldComparator.java b/src/main/java/edu/jhu/thrax/hadoop/comparators/FieldComparator.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/comparators/FieldComparator.java rename to src/main/java/edu/jhu/thrax/hadoop/comparators/FieldComparator.java diff --git a/src/edu/jhu/thrax/hadoop/comparators/PrimitiveArrayMarginalComparator.java b/src/main/java/edu/jhu/thrax/hadoop/comparators/PrimitiveArrayMarginalComparator.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/comparators/PrimitiveArrayMarginalComparator.java rename to src/main/java/edu/jhu/thrax/hadoop/comparators/PrimitiveArrayMarginalComparator.java diff --git a/src/edu/jhu/thrax/hadoop/comparators/TextMarginalComparator.java b/src/main/java/edu/jhu/thrax/hadoop/comparators/TextMarginalComparator.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/comparators/TextMarginalComparator.java rename to src/main/java/edu/jhu/thrax/hadoop/comparators/TextMarginalComparator.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/AlignedRuleWritable.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/AlignedRuleWritable.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/AlignedRuleWritable.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/AlignedRuleWritable.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/AlignmentWritable.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/AlignmentWritable.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/AlignmentWritable.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/AlignmentWritable.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/Annotation.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/Annotation.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/Annotation.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/Annotation.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/FeatureMap.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/FeatureMap.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/FeatureMap.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/FeatureMap.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/FeaturePair.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/FeaturePair.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/FeaturePair.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/FeaturePair.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/FeatureValue.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/FeatureValue.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/FeatureValue.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/FeatureValue.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/IntPair.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/IntPair.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/IntPair.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/IntPair.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/PrimitiveUtils.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/PrimitiveUtils.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/PrimitiveUtils.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/PrimitiveUtils.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/RuleWritable.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/RuleWritable.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/RuleWritable.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/RuleWritable.java diff --git a/src/edu/jhu/thrax/hadoop/datatypes/TextPair.java b/src/main/java/edu/jhu/thrax/hadoop/datatypes/TextPair.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/datatypes/TextPair.java rename to src/main/java/edu/jhu/thrax/hadoop/datatypes/TextPair.java diff --git a/src/edu/jhu/thrax/hadoop/distributional/CommonLSH.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java similarity index 63% rename from src/edu/jhu/thrax/hadoop/distributional/CommonLSH.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java index e5c0946..03471ad 100644 --- a/src/edu/jhu/thrax/hadoop/distributional/CommonLSH.java +++ b/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java @@ -9,9 +9,9 @@ public class CommonLSH { public static SLSH getSLSH(Configuration conf) { SLSH slsh = null; try { - slsh = new SLSH(); - slsh.initialize(conf.getInt("thrax.lsh-num-bits", 256), - conf.getInt("thrax.lsh-pool-size", 100000), conf.getInt("thrax.lsh-random-seed", 42)); + slsh = new SLSH(true); + //slsh.initialize(conf.getInt("thrax.lsh-num-bits", 256), + // conf.getInt("thrax.lsh-pool-size", 100000), conf.getInt("thrax.lsh-random-seed", 42)); } catch (Exception e) { e.printStackTrace(); System.exit(1); diff --git a/src/edu/jhu/thrax/hadoop/distributional/ContextWritable.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/distributional/ContextWritable.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java diff --git a/src/edu/jhu/thrax/hadoop/distributional/DistributionalContextCombiner.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextCombiner.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/distributional/DistributionalContextCombiner.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextCombiner.java diff --git a/src/edu/jhu/thrax/hadoop/distributional/DistributionalContextMapper.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextMapper.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/distributional/DistributionalContextMapper.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextMapper.java diff --git a/src/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java similarity index 95% rename from src/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java index f117f51..86c1726 100644 --- a/src/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java +++ b/src/main/java/edu/jhu/thrax/hadoop/distributional/DistributionalContextReducer.java @@ -33,7 +33,7 @@ protected void reduce(Text key, Iterable values, Context contex Signature reduced_signature = new Signature(); // TODO: double-check need for deep copy? reduced_signature.sums = reduced.sums; - slsh.buildSignature(reduced_signature, false); + slsh.buildSignature(reduced_signature.toString(), false); context.write(new SignatureWritable(key, reduced_signature, reduced.strength.get()), NullWritable.get()); } diff --git a/src/edu/jhu/thrax/hadoop/distributional/SignatureWritable.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/SignatureWritable.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/distributional/SignatureWritable.java rename to src/main/java/edu/jhu/thrax/hadoop/distributional/SignatureWritable.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/ExtractionCombiner.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionCombiner.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/ExtractionCombiner.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionCombiner.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/ExtractionReducer.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/ExtractionReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionReducer.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/HierarchicalRuleWritableExtractor.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/HierarchicalRuleWritableExtractor.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/HierarchicalRuleWritableExtractor.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/HierarchicalRuleWritableExtractor.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractor.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractor.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractor.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractor.java diff --git a/src/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractorFactory.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractorFactory.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractorFactory.java rename to src/main/java/edu/jhu/thrax/hadoop/extraction/RuleWritableExtractorFactory.java diff --git a/src/edu/jhu/thrax/hadoop/features/AbstractnessFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/AbstractnessFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/AbstractnessFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/AbstractnessFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/AdjacentNonTerminalsFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/AdjacentNonTerminalsFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/AdjacentNonTerminalsFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/AdjacentNonTerminalsFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/CharacterCompressionRatioFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/CharacterCompressionRatioFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/CharacterCompressionRatioFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/CharacterCompressionRatioFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/CharacterCountDifferenceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/CharacterCountDifferenceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/CharacterCountDifferenceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/CharacterCountDifferenceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/ConsumeSourceTerminalsFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/ConsumeSourceTerminalsFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/ConsumeSourceTerminalsFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/ConsumeSourceTerminalsFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/Feature.java b/src/main/java/edu/jhu/thrax/hadoop/features/Feature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/Feature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/Feature.java diff --git a/src/edu/jhu/thrax/hadoop/features/GlueRuleFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/GlueRuleFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/GlueRuleFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/GlueRuleFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/IdentityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/IdentityFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/IdentityFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/IdentityFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/LexicalityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/LexicalityFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/LexicalityFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/LexicalityFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/MonotonicFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/MonotonicFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/MonotonicFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/MonotonicFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/PhrasePenaltyFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/PhrasePenaltyFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/PhrasePenaltyFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/PhrasePenaltyFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/ProduceTargetTerminalsFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/ProduceTargetTerminalsFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/ProduceTargetTerminalsFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/ProduceTargetTerminalsFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/SimpleFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/SimpleFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/SimpleFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/SimpleFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/SimpleFeatureFactory.java b/src/main/java/edu/jhu/thrax/hadoop/features/SimpleFeatureFactory.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/SimpleFeatureFactory.java rename to src/main/java/edu/jhu/thrax/hadoop/features/SimpleFeatureFactory.java diff --git a/src/edu/jhu/thrax/hadoop/features/SourceWordCounterFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/SourceWordCounterFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/SourceWordCounterFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/SourceWordCounterFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/TargetWordCounterFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/TargetWordCounterFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/TargetWordCounterFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/TargetWordCounterFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/WordCompressionRatioFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/WordCompressionRatioFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/WordCompressionRatioFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/WordCompressionRatioFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/WordCountDifferenceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/WordCountDifferenceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/WordCountDifferenceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/WordCountDifferenceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/WordLengthDifferenceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/WordLengthDifferenceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/WordLengthDifferenceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/WordLengthDifferenceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java b/src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java rename to src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java diff --git a/src/edu/jhu/thrax/hadoop/features/XRuleFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/XRuleFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/XRuleFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/XRuleFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AlignmentFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AlignmentFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AlignmentFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AlignmentFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureFactory.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureFactory.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureFactory.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureFactory.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AnnotationPassthroughFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationPassthroughFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AnnotationPassthroughFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationPassthroughFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/AnnotationReducer.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/AnnotationReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationReducer.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/CountFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/CountFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/CountFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/CountFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/LogCountFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/LogCountFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/LogCountFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/LogCountFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/RarityPenaltyFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/RarityPenaltyFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/RarityPenaltyFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/RarityPenaltyFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/UnalignedSourceCounterFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/UnalignedSourceCounterFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/UnalignedSourceCounterFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/UnalignedSourceCounterFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/annotation/UnalignedTargetCounterFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/UnalignedTargetCounterFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/annotation/UnalignedTargetCounterFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/annotation/UnalignedTargetCounterFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/LhsGivenSourcePhraseFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/LhsGivenSourcePhraseFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/LhsGivenSourcePhraseFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/LhsGivenSourcePhraseFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/LhsGivenTargetPhraseFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/LhsGivenTargetPhraseFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/LhsGivenTargetPhraseFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/LhsGivenTargetPhraseFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/SourceCountFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourceCountFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/SourceCountFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourceCountFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetandLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetandLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetandLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetandLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/TargetCountFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetCountFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/TargetCountFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetCountFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceandLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceandLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceandLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceandLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/NonAggregatingPivotedFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/NonAggregatingPivotedFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/NonAggregatingPivotedFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/NonAggregatingPivotedFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedAnnotationFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedAnnotationFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedAnnotationFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedAnnotationFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedFeatureFactory.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedFeatureFactory.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedFeatureFactory.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedFeatureFactory.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalSourceGivenTargetFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalSourceGivenTargetFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalSourceGivenTargetFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalSourceGivenTargetFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalTargetGivenSourceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalTargetGivenSourceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalTargetGivenSourceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLexicalTargetGivenSourceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenSourcePhraseFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenSourcePhraseFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenSourcePhraseFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenSourcePhraseFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenTargetPhraseFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenTargetPhraseFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenTargetPhraseFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedLhsGivenTargetPhraseFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedNegLogProbFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedNegLogProbFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedNegLogProbFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedNegLogProbFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedRarityPenaltyFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedRarityPenaltyFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedRarityPenaltyFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedRarityPenaltyFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetAndLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetAndLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetAndLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetAndLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedSourcePhraseGivenTargetFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceAndLHSFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceAndLHSFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceAndLHSFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceAndLHSFeature.java diff --git a/src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceFeature.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceFeature.java rename to src/main/java/edu/jhu/thrax/hadoop/features/pivot/PivotedTargetPhraseGivenSourceFeature.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/DefaultValues.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/DefaultValues.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/DefaultValues.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/DefaultValues.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/JobState.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/JobState.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/JobState.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/JobState.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/OutputJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/OutputJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/Scheduler.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/Scheduler.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/SchedulerException.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/SchedulerException.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/SchedulerException.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/SchedulerException.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/SourceWordGivenTargetWordProbabilityJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/SourceWordGivenTargetWordProbabilityJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/SourceWordGivenTargetWordProbabilityJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/SourceWordGivenTargetWordProbabilityJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/TargetWordGivenSourceWordProbabilityJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/TargetWordGivenSourceWordProbabilityJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/TargetWordGivenSourceWordProbabilityJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/TargetWordGivenSourceWordProbabilityJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/ThraxJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ThraxJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/ThraxJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/ThraxJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java diff --git a/src/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java rename to src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java diff --git a/src/edu/jhu/thrax/hadoop/output/OutputReducer.java b/src/main/java/edu/jhu/thrax/hadoop/output/OutputReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/output/OutputReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/output/OutputReducer.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/AggregationCombiner.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationCombiner.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/AggregationCombiner.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationCombiner.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/AggregationMapper.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationMapper.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/AggregationMapper.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationMapper.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/AggregationReducer.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/AggregationReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/AggregationReducer.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/FeatureCollectionReducer.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/FeatureCollectionReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/FeatureCollectionReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/FeatureCollectionReducer.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/PivotingMapper.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingMapper.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/PivotingMapper.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingMapper.java diff --git a/src/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java rename to src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java diff --git a/src/edu/jhu/thrax/hadoop/tools/ExtractionTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/tools/ExtractionTool.java rename to src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java diff --git a/src/edu/jhu/thrax/hadoop/tools/FeatureTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/tools/FeatureTool.java rename to src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java diff --git a/src/edu/jhu/thrax/hadoop/tools/OutputTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/tools/OutputTool.java rename to src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java diff --git a/src/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java rename to src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java diff --git a/src/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java similarity index 100% rename from src/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java rename to src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java diff --git a/src/edu/jhu/thrax/lexprob/HashMapLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/HashMapLexprobTable.java rename to src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java diff --git a/src/edu/jhu/thrax/lexprob/LexicalProbabilityTable.java b/src/main/java/edu/jhu/thrax/lexprob/LexicalProbabilityTable.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/LexicalProbabilityTable.java rename to src/main/java/edu/jhu/thrax/lexprob/LexicalProbabilityTable.java diff --git a/src/edu/jhu/thrax/lexprob/LexprobTest.java b/src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/LexprobTest.java rename to src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java diff --git a/src/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java rename to src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java diff --git a/src/edu/jhu/thrax/lexprob/TableEntry.java b/src/main/java/edu/jhu/thrax/lexprob/TableEntry.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/TableEntry.java rename to src/main/java/edu/jhu/thrax/lexprob/TableEntry.java diff --git a/src/edu/jhu/thrax/lexprob/TrieLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java similarity index 100% rename from src/edu/jhu/thrax/lexprob/TrieLexprobTable.java rename to src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java diff --git a/src/edu/jhu/thrax/syntax/LatticeArray.java b/src/main/java/edu/jhu/thrax/syntax/LatticeArray.java similarity index 100% rename from src/edu/jhu/thrax/syntax/LatticeArray.java rename to src/main/java/edu/jhu/thrax/syntax/LatticeArray.java diff --git a/src/edu/jhu/thrax/syntax/ParseLattice.java b/src/main/java/edu/jhu/thrax/syntax/ParseLattice.java similarity index 100% rename from src/edu/jhu/thrax/syntax/ParseLattice.java rename to src/main/java/edu/jhu/thrax/syntax/ParseLattice.java diff --git a/src/edu/jhu/thrax/syntax/ParseTree.java b/src/main/java/edu/jhu/thrax/syntax/ParseTree.java similarity index 100% rename from src/edu/jhu/thrax/syntax/ParseTree.java rename to src/main/java/edu/jhu/thrax/syntax/ParseTree.java diff --git a/src/edu/jhu/thrax/tools/ExtractPropbankRules.java b/src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java similarity index 100% rename from src/edu/jhu/thrax/tools/ExtractPropbankRules.java rename to src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java diff --git a/src/edu/jhu/thrax/tools/JudgeParaphrases.java b/src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java similarity index 100% rename from src/edu/jhu/thrax/tools/JudgeParaphrases.java rename to src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java diff --git a/src/edu/jhu/thrax/tools/ParaphraseCoverage.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java similarity index 100% rename from src/edu/jhu/thrax/tools/ParaphraseCoverage.java rename to src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java diff --git a/src/edu/jhu/thrax/tools/ParaphraseIntersect.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java similarity index 100% rename from src/edu/jhu/thrax/tools/ParaphraseIntersect.java rename to src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java diff --git a/src/edu/jhu/thrax/tools/ParaphraseOverlap.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java similarity index 100% rename from src/edu/jhu/thrax/tools/ParaphraseOverlap.java rename to src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java diff --git a/src/edu/jhu/thrax/tools/ParaphraseScore.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java similarity index 100% rename from src/edu/jhu/thrax/tools/ParaphraseScore.java rename to src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java diff --git a/src/edu/jhu/thrax/tools/ParaphraseWordNet.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java similarity index 100% rename from src/edu/jhu/thrax/tools/ParaphraseWordNet.java rename to src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java diff --git a/src/edu/jhu/thrax/tools/SequenceToGrammar.java b/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java similarity index 100% rename from src/edu/jhu/thrax/tools/SequenceToGrammar.java rename to src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java diff --git a/src/edu/jhu/thrax/tools/SequenceToSignatures.java b/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java similarity index 100% rename from src/edu/jhu/thrax/tools/SequenceToSignatures.java rename to src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java diff --git a/src/edu/jhu/thrax/tools/SplitAndFilter.java b/src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java similarity index 100% rename from src/edu/jhu/thrax/tools/SplitAndFilter.java rename to src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java diff --git a/src/edu/jhu/thrax/util/BackwardsCompatibility.java b/src/main/java/edu/jhu/thrax/util/BackwardsCompatibility.java similarity index 100% rename from src/edu/jhu/thrax/util/BackwardsCompatibility.java rename to src/main/java/edu/jhu/thrax/util/BackwardsCompatibility.java diff --git a/src/edu/jhu/thrax/util/ConfFileParser.java b/src/main/java/edu/jhu/thrax/util/ConfFileParser.java similarity index 100% rename from src/edu/jhu/thrax/util/ConfFileParser.java rename to src/main/java/edu/jhu/thrax/util/ConfFileParser.java diff --git a/src/edu/jhu/thrax/util/CreateGlueGrammar.java b/src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java similarity index 100% rename from src/edu/jhu/thrax/util/CreateGlueGrammar.java rename to src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java diff --git a/src/edu/jhu/thrax/util/DefaultConfigFileLoader.java b/src/main/java/edu/jhu/thrax/util/DefaultConfigFileLoader.java similarity index 100% rename from src/edu/jhu/thrax/util/DefaultConfigFileLoader.java rename to src/main/java/edu/jhu/thrax/util/DefaultConfigFileLoader.java diff --git a/src/edu/jhu/thrax/util/ExternalizableToUtf8.java b/src/main/java/edu/jhu/thrax/util/ExternalizableToUtf8.java similarity index 100% rename from src/edu/jhu/thrax/util/ExternalizableToUtf8.java rename to src/main/java/edu/jhu/thrax/util/ExternalizableToUtf8.java diff --git a/src/edu/jhu/thrax/util/FormatUtils.java b/src/main/java/edu/jhu/thrax/util/FormatUtils.java similarity index 100% rename from src/edu/jhu/thrax/util/FormatUtils.java rename to src/main/java/edu/jhu/thrax/util/FormatUtils.java diff --git a/src/edu/jhu/thrax/util/GrammarComparison.java b/src/main/java/edu/jhu/thrax/util/GrammarComparison.java similarity index 100% rename from src/edu/jhu/thrax/util/GrammarComparison.java rename to src/main/java/edu/jhu/thrax/util/GrammarComparison.java diff --git a/src/edu/jhu/thrax/util/Intersect.java b/src/main/java/edu/jhu/thrax/util/Intersect.java similarity index 100% rename from src/edu/jhu/thrax/util/Intersect.java rename to src/main/java/edu/jhu/thrax/util/Intersect.java diff --git a/src/edu/jhu/thrax/util/MalformedInput.java b/src/main/java/edu/jhu/thrax/util/MalformedInput.java similarity index 100% rename from src/edu/jhu/thrax/util/MalformedInput.java rename to src/main/java/edu/jhu/thrax/util/MalformedInput.java diff --git a/src/edu/jhu/thrax/util/MalformedInput.properties b/src/main/java/edu/jhu/thrax/util/MalformedInput.properties similarity index 100% rename from src/edu/jhu/thrax/util/MalformedInput.properties rename to src/main/java/edu/jhu/thrax/util/MalformedInput.properties diff --git a/src/edu/jhu/thrax/util/MurmurHash.java b/src/main/java/edu/jhu/thrax/util/MurmurHash.java similarity index 100% rename from src/edu/jhu/thrax/util/MurmurHash.java rename to src/main/java/edu/jhu/thrax/util/MurmurHash.java diff --git a/src/edu/jhu/thrax/util/NegLogMath.java b/src/main/java/edu/jhu/thrax/util/NegLogMath.java similarity index 100% rename from src/edu/jhu/thrax/util/NegLogMath.java rename to src/main/java/edu/jhu/thrax/util/NegLogMath.java diff --git a/src/edu/jhu/thrax/util/SequenceFileCreator.java b/src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java similarity index 100% rename from src/edu/jhu/thrax/util/SequenceFileCreator.java rename to src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java diff --git a/src/edu/jhu/thrax/util/TestSetFilter.java b/src/main/java/edu/jhu/thrax/util/TestSetFilter.java similarity index 100% rename from src/edu/jhu/thrax/util/TestSetFilter.java rename to src/main/java/edu/jhu/thrax/util/TestSetFilter.java diff --git a/src/edu/jhu/thrax/util/Vocabulary.java b/src/main/java/edu/jhu/thrax/util/Vocabulary.java similarity index 100% rename from src/edu/jhu/thrax/util/Vocabulary.java rename to src/main/java/edu/jhu/thrax/util/Vocabulary.java diff --git a/src/edu/jhu/thrax/util/amazon/AmazonConfigFileLoader.java b/src/main/java/edu/jhu/thrax/util/amazon/AmazonConfigFileLoader.java similarity index 100% rename from src/edu/jhu/thrax/util/amazon/AmazonConfigFileLoader.java rename to src/main/java/edu/jhu/thrax/util/amazon/AmazonConfigFileLoader.java diff --git a/src/edu/jhu/thrax/util/exceptions/ConfigurationException.java b/src/main/java/edu/jhu/thrax/util/exceptions/ConfigurationException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/ConfigurationException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/ConfigurationException.java diff --git a/src/edu/jhu/thrax/util/exceptions/EmptyAlignmentException.java b/src/main/java/edu/jhu/thrax/util/exceptions/EmptyAlignmentException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/EmptyAlignmentException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/EmptyAlignmentException.java diff --git a/src/edu/jhu/thrax/util/exceptions/EmptySentenceException.java b/src/main/java/edu/jhu/thrax/util/exceptions/EmptySentenceException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/EmptySentenceException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/EmptySentenceException.java diff --git a/src/edu/jhu/thrax/util/exceptions/InconsistentAlignmentException.java b/src/main/java/edu/jhu/thrax/util/exceptions/InconsistentAlignmentException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/InconsistentAlignmentException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/InconsistentAlignmentException.java diff --git a/src/edu/jhu/thrax/util/exceptions/MalformedInputException.java b/src/main/java/edu/jhu/thrax/util/exceptions/MalformedInputException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/MalformedInputException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/MalformedInputException.java diff --git a/src/edu/jhu/thrax/util/exceptions/MalformedParseException.java b/src/main/java/edu/jhu/thrax/util/exceptions/MalformedParseException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/MalformedParseException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/MalformedParseException.java diff --git a/src/edu/jhu/thrax/util/exceptions/NotEnoughFieldsException.java b/src/main/java/edu/jhu/thrax/util/exceptions/NotEnoughFieldsException.java similarity index 100% rename from src/edu/jhu/thrax/util/exceptions/NotEnoughFieldsException.java rename to src/main/java/edu/jhu/thrax/util/exceptions/NotEnoughFieldsException.java diff --git a/src/edu/jhu/thrax/util/io/InputUtilities.java b/src/main/java/edu/jhu/thrax/util/io/InputUtilities.java similarity index 100% rename from src/edu/jhu/thrax/util/io/InputUtilities.java rename to src/main/java/edu/jhu/thrax/util/io/InputUtilities.java diff --git a/src/edu/jhu/thrax/util/io/LineReader.java b/src/main/java/edu/jhu/thrax/util/io/LineReader.java similarity index 100% rename from src/edu/jhu/thrax/util/io/LineReader.java rename to src/main/java/edu/jhu/thrax/util/io/LineReader.java diff --git a/src/edu/jhu/thrax/util/io/Reader.java b/src/main/java/edu/jhu/thrax/util/io/Reader.java similarity index 100% rename from src/edu/jhu/thrax/util/io/Reader.java rename to src/main/java/edu/jhu/thrax/util/io/Reader.java diff --git a/test/edu/jhu/thrax/datatypes/ArrayAlignmentTest.java b/src/test/java/edu/jhu/thrax/datatypes/ArrayAlignmentTest.java similarity index 100% rename from test/edu/jhu/thrax/datatypes/ArrayAlignmentTest.java rename to src/test/java/edu/jhu/thrax/datatypes/ArrayAlignmentTest.java diff --git a/test/edu/jhu/thrax/extraction/SAMTLabelerTest.java b/src/test/java/edu/jhu/thrax/extraction/SAMTLabelerTest.java similarity index 100% rename from test/edu/jhu/thrax/extraction/SAMTLabelerTest.java rename to src/test/java/edu/jhu/thrax/extraction/SAMTLabelerTest.java diff --git a/test/edu/jhu/thrax/syntax/ParseTreeTest.java b/src/test/java/edu/jhu/thrax/syntax/ParseTreeTest.java similarity index 100% rename from test/edu/jhu/thrax/syntax/ParseTreeTest.java rename to src/test/java/edu/jhu/thrax/syntax/ParseTreeTest.java diff --git a/test/edu/jhu/thrax/util/io/InputUtilitiesTest.java b/src/test/java/edu/jhu/thrax/util/io/InputUtilitiesTest.java similarity index 100% rename from test/edu/jhu/thrax/util/io/InputUtilitiesTest.java rename to src/test/java/edu/jhu/thrax/util/io/InputUtilitiesTest.java From c4aa677d8e46bc07686ddf65a1cb4a3d7da65d9c Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Fri, 13 May 2016 20:36:33 -0700 Subject: [PATCH 2/6] THRAX-11 Publish thrax to Sonatype --- pom.xml | 625 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 312 insertions(+), 313 deletions(-) diff --git a/pom.xml b/pom.xml index 7ddecda..0dc9c97 100644 --- a/pom.xml +++ b/pom.xml @@ -1,341 +1,340 @@ - + - - org.sonatype.oss - oss-parent - 7 - + + org.sonatype.oss + oss-parent + 7 + - 4.0.0 - org.joshua-decoder - thrax - Thrax - jar - 1.0-SNAPSHOT - - - Apache License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - - + 4.0.0 + org.joshua-decoder + thrax + Thrax + jar + 1.0-SNAPSHOT + + + Apache License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + + - https://github.com/joshua-decoder/thrax + https://github.com/joshua-decoder/thrax - Hadoop-based tool for extraction of large + Hadoop-based tool for extraction of large scale synchronous grammars for paraphrasing and machine translation. - - https://github.com/joshua-decoder/thrax.git - scm:git:git://github.com/joshua-decoder/thrax.git - scm:git:git@github.com/joshua-decoder/thrax.git - HEAD + + https://github.com/joshua-decoder/thrax.git + scm:git:git://github.com/joshua-decoder/thrax.git + scm:git:git@github.com/joshua-decoder/thrax.git + HEAD - - - sonatype-nexus-staging - Nexus Staging Repository - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - + + + sonatype-nexus-staging + Nexus Staging Repository + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + - - GitHub Issues - https://github.com/joshua-decoder/thrax/issues - + + GitHub Issues + https://github.com/joshua-decoder/thrax/issues + - - - Project Mailing List - dev[at]joshua[dot]incubator[dot]apache[dot]org - - + + + Project Mailing List + dev[at]joshua[dot]incubator[dot]apache[dot]org + + - - Joshua Decoder - https://github.com/joshua-decoder - + + Joshua Decoder + https://github.com/joshua-decoder + - - - jweese - Jonny Weese - jonny[at]cs[dot]jhu[dot]edu - - - jganitkevitch - Juri Ganitkevitch - juri[at]cs[dot]jhu[dot]edu - - - mjpost - Matt Post - post[at]cs[dot]jhu[dot]edu - + + + jweese + Jonny Weese + jonny[at]cs[dot]jhu[dot]edu + + + jganitkevitch + Juri Ganitkevitch + juri[at]cs[dot]jhu[dot]edu + + + mjpost + Matt Post + post[at]cs[dot]jhu[dot]edu + dowobeha Lane Schwartz - - lewismc - Lewis John McGibbney - lewismc[at]apache[dot]org - - + + lewismc + Lewis John McGibbney + lewismc[at]apache[dot]org + + + + + install + target + ${basedir}/target/classes + ${project.artifactId}-${project.version} + ${basedir}/target/test-classes + ${basedir}/src/main/java + ${basedir}/src/test/java + + + src/test/resources/ + + ** + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + ${maven-deploy-plugin.version} + + + org.apache.maven.plugins + maven-release-plugin + ${maven-release-plugin.version} + + forked-path + false + -Prelease + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + de.thetaphi + forbiddenapis + [1.8,) + + testCheck + check + + + + + + + + + + + + - - install - target - ${basedir}/target/classes - ${project.artifactId}-${project.version} - ${basedir}/target/test-classes - ${basedir}/src/main/java - ${basedir}/src/test/java - - - src/test/resources/ - - ** - - - - - - - org.apache.maven.plugins - maven-deploy-plugin - ${maven-deploy-plugin.version} - - - org.apache.maven.plugins - maven-release-plugin - ${maven-release-plugin.version} - - forked-path - false - -Prelease - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - de.thetaphi - forbiddenapis - [1.8,) - - testCheck - check - - - - - - - - - - - - + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-compiler-plugin.version} + + + + test-jar + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + true + + ${javac.src.version} + ${javac.target.version} + + + + org.apache.maven.plugins + maven-surefire-plugin + ${maven-surfire-plugin.version} + true + + + ${project.basedir}/target/test-data/ + + -Xmx512m + always + false + + + + com.googlecode.maven-java-formatter-plugin + maven-java-formatter-plugin + 0.4 + + ${project.basedir}/doc/eclipse-formatter.xml + + + + de.thetaphi + forbiddenapis + 1.8 + + + true + + false + + jdk-unsafe + jdk-deprecated + jdk-system-out + + + + + + + check + testCheck + + + + + + - - - org.apache.maven.plugins - maven-jar-plugin - ${maven-compiler-plugin.version} - - - - test-jar - - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven-compiler-plugin.version} - true - - ${javac.src.version} - ${javac.target.version} - - - - org.apache.maven.plugins - maven-surefire-plugin - ${maven-surfire-plugin.version} - true - - - ${project.basedir}/target/test-data/ - - -Xmx512m - always - false - - - - com.googlecode.maven-java-formatter-plugin - maven-java-formatter-plugin - 0.4 - - ${project.basedir}/doc/eclipse-formatter.xml - - - - de.thetaphi - forbiddenapis - 1.8 + + + release + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + attach-sources + + jar-no-fork + - - true - - false - - jdk-unsafe - jdk-deprecated - jdk-system-out - - + + + true + true + + + ${implementation.build} + ${maven.build.timestamp} + ${javac.src.version} + ${javac.target.version} + + - - - - check - testCheck - - - - - - - - - - release - - - - org.apache.maven.plugins - maven-source-plugin - ${maven-source-plugin.version} - - - attach-sources - - jar-no-fork - - - - - true - true - - - ${implementation.build} - ${maven.build.timestamp} - ${javac.src.version} - ${javac.target.version} - - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven-javadoc-plugin.version} - - - attach-javadocs - - jar - - - true - - - true - true - - - ${implementation.build} - ${maven.build.timestamp} - ${javac.src.version} - ${javac.target.version} - - - - - - - - org.apache.maven.plugins - maven-gpg-plugin - ${maven-gpg-plugin.version} - - - sign-artifacts - verify - - sign - - - - - - net.ju-n.maven.plugins - checksum-maven-plugin - ${checksum-maven-plugin.version} - - - - - + + + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + + attach-javadocs + + jar + + + true + + + true + true + + + ${implementation.build} + ${maven.build.timestamp} + ${javac.src.version} + ${javac.target.version} + + + + + + + + org.apache.maven.plugins + maven-gpg-plugin + ${maven-gpg-plugin.version} + + + sign-artifacts + verify + + sign + + + + + + net.ju-n.maven.plugins + checksum-maven-plugin + ${checksum-maven-plugin.version} + + + + + - - - 2.3.2 - 2.5 - 2.4 - 2.12 - 2.5.1 - 2.1.2 - 2.9.1 - 1.4 - 0.8 - 2.2.2 - 2.5 - 1.0.1 + + + 2.3.2 + 2.5 + 2.4 + 2.12 + 2.5.1 + 2.1.2 + 2.9.1 + 1.4 + 0.8 + 2.2.2 + 2.5 + 1.0.1 - - ${scmBranch}@r${buildNumber} - 1.7 - 1.7 - 1.7 - yyyy-MM-dd HH:mm:ssZ - false - ${project.build.finalName} - + + ${scmBranch}@r${buildNumber} + 1.7 + 1.7 + 1.7 + yyyy-MM-dd HH:mm:ssZ + false + ${project.build.finalName} + - + - + com.amazonaws aws-java-sdk @@ -368,5 +367,5 @@ jdk15 test - + From 3ec03ef2c3d16d5ede148b54412fdb57c4c2d1e6 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Mon, 16 May 2016 12:26:38 -0700 Subject: [PATCH 3/6] THRAX-11 Publish thrax to Sonatype --- .../distributional/ContextWritable.java | 2 +- .../annotation/AnnotationFeatureJob.java | 2 +- .../features/mapred/MapReduceFeature.java | 2 +- .../DistributionalContextExtractionJob.java | 2 +- .../jobs/DistributionalContextSortingJob.java | 2 +- .../jhu/thrax/hadoop/jobs/ExtractionJob.java | 2 +- .../hadoop/jobs/FeatureCollectionJob.java | 2 +- .../edu/jhu/thrax/hadoop/jobs/OutputJob.java | 2 +- .../hadoop/jobs/ParaphraseAggregationJob.java | 2 +- .../hadoop/jobs/ParaphrasePivotingJob.java | 2 +- .../jhu/thrax/hadoop/jobs/VocabularyJob.java | 2 +- .../jhu/thrax/hadoop/jobs/WordLexprobJob.java | 2 +- .../thrax/hadoop/tools/ExtractionTool.java | 2 +- .../jhu/thrax/hadoop/tools/FeatureTool.java | 2 +- .../jhu/thrax/hadoop/tools/OutputTool.java | 2 +- ...rceWordGivenTargetWordProbabilityTool.java | 2 +- ...getWordGivenSourceWordProbabilityTool.java | 2 +- .../lexprob/SequenceFileLexprobTable.java | 9 ++-- .../jhu/thrax/tools/SequenceToGrammar.java | 5 +- .../jhu/thrax/tools/SequenceToSignatures.java | 10 ++-- .../jhu/thrax/util/SequenceFileCreator.java | 51 ++++++++++--------- .../edu/jhu/thrax/util/TestSetFilter.java | 3 ++ .../java/edu/jhu/thrax/util/Vocabulary.java | 4 +- .../edu/jhu/thrax/util/io/LineReader.java | 1 - 24 files changed, 63 insertions(+), 54 deletions(-) diff --git a/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java index ed007fc..93edf68 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java +++ b/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java @@ -88,7 +88,7 @@ private void mergeSums(ContextWritable that, SLSH slsh) { // TODO: probably needs deep copy. this_signature.sums = sums; that_signature.sums = sums; - slsh.updateSignature(this_signature, that_signature); + slsh.update(this_signature.toString(), that_signature); } public void compact(SLSH slsh) { diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java index fd7f9c2..a780299 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/AnnotationFeatureJob.java @@ -48,7 +48,7 @@ public String getOutputSuffix() { public Job getJob(Configuration conf) throws IOException { String name = getName(); - Job job = new Job(conf, name); + Job job = Job.getInstance(conf, name); job.setJarByClass(this.getClass()); job.setMapperClass(Mapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java index 25ffd3d..fafdecb 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java @@ -48,7 +48,7 @@ public Class combinerClass() { public Job getJob(Configuration conf) throws IOException { String name = getName(); - Job job = new Job(conf, name); + Job job = Job.getInstance(conf, name); job.setJarByClass(this.getClass()); job.setMapperClass(this.mapperClass()); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java index bb5a78b..5186706 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextExtractionJob.java @@ -22,7 +22,7 @@ public class DistributionalContextExtractionJob implements ThraxJob { public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "distributional"); + Job job = Job.getInstance(conf, "distributional"); job.setJarByClass(DistributionalContextMapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java index 89068c3..4698da4 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/DistributionalContextSortingJob.java @@ -24,7 +24,7 @@ public class DistributionalContextSortingJob implements ThraxJob { new HashSet>(); public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "sorting"); + Job job = Job.getInstance(conf, "sorting"); job.setJarByClass(DistributionalContextMapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java index 6ef30aa..a4f3cce 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/ExtractionJob.java @@ -27,7 +27,7 @@ public Set> getPrerequisites() { } public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "extraction"); + Job job = Job.getInstance(conf, "extraction"); job.setJarByClass(ExtractionMapper.class); job.setMapperClass(ExtractionMapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java index 9657fea..b5e971e 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java @@ -42,7 +42,7 @@ public Set> getPrerequisites() { } public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "collect"); + Job job = Job.getInstance(conf, "collect"); String workDir = conf.get("thrax.work-dir"); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java index 94d2111..3954ea6 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/OutputJob.java @@ -39,7 +39,7 @@ public static void addPrerequisite(Class c) { } public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "collect"); + Job job = Job.getInstance(conf, "collect"); String workDir = conf.get("thrax.work-dir"); job.setJarByClass(OutputReducer.class); job.setMapperClass(Mapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java index 58e1300..e734b58 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphraseAggregationJob.java @@ -26,7 +26,7 @@ public class ParaphraseAggregationJob implements ThraxJob { new HashSet>(); public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "aggregate"); + Job job = Job.getInstance(conf, "aggregate"); job.setJarByClass(AggregationReducer.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java index ce90129..7f004d6 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/ParaphrasePivotingJob.java @@ -32,7 +32,7 @@ public Set> getPrerequisites() { } public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "pivoting"); + Job job = Job.getInstance(conf, "pivoting"); job.setJarByClass(PivotingReducer.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java index fee3d02..76945d5 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/VocabularyJob.java @@ -28,7 +28,7 @@ public class VocabularyJob implements ThraxJob { public VocabularyJob() {} public Job getJob(Configuration conf) throws IOException { - Job job = new Job(conf, "vocabulary"); + Job job = Job.getInstance(conf, "vocabulary"); job.setJarByClass(VocabularyJob.class); job.setMapperClass(VocabularyJob.Map.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java index ccb135b..0ae51f4 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/WordLexprobJob.java @@ -33,7 +33,7 @@ public Set> getPrerequisites() { public Job getJob(Configuration conf) throws IOException { Configuration theConf = new Configuration(conf); theConf.setBoolean(SOURCE_GIVEN_TARGET, isSourceGivenTarget); - Job job = new Job(theConf, getName()); + Job job = Job.getInstance(theConf, getName()); job.setJarByClass(WordLexicalProbabilityCalculator.class); job.setMapperClass(WordLexicalProbabilityCalculator.Map.class); job.setCombinerClass(IntSumReducer.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java index dd8a73b..baefe8f 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java @@ -44,7 +44,7 @@ public int run(String [] argv) throws Exception return 1; } - Job job = new Job(conf, "thrax"); + Job job = Job.getInstance(conf, "thrax"); job.setJarByClass(ExtractionMapper.class); job.setMapperClass(ExtractionMapper.class); job.setCombinerClass(IntSumReducer.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java index 466d5e0..07d0ec0 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java @@ -48,7 +48,7 @@ public int run(String [] argv) throws Exception workDir += Path.SEPARATOR; conf.set("thrax.work-dir", workDir); } - Job job = new Job(conf, String.format("thrax-%s", featureName)); + Job job = Job.getInstance(conf, String.format("thrax-%s", featureName)); job.setJarByClass(f.getClass()); job.setMapperClass(f.mapperClass()); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java index 2f85a16..c357e54 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java @@ -45,7 +45,7 @@ public int run(String [] argv) throws Exception workDir += Path.SEPARATOR; conf.set("thrax.work-dir", workDir); } - Job job = new Job(conf, "thrax-collect"); + Job job = Job.getInstance(conf, "thrax-collect"); job.setJarByClass(OutputReducer.class); job.setMapperClass(Mapper.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java index 32c2148..24217cf 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java @@ -49,7 +49,7 @@ public int run(String [] argv) throws Exception conf.set("thrax.work-dir", workDir); } conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, true); - Job job = new Job(conf, "thrax-sgt-word-lexprob"); + Job job = Job.getInstance(conf, "thrax-sgt-word-lexprob"); job.setJarByClass(WordLexicalProbabilityCalculator.class); job.setMapperClass(WordLexicalProbabilityCalculator.Map.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java index e40bcf8..e3215bf 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java @@ -49,7 +49,7 @@ public int run(String [] argv) throws Exception conf.set("thrax.work-dir", workDir); } conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, false); - Job job = new Job(conf, "thrax-tgs-word-lexprob"); + Job job = Job.getInstance(conf, "thrax-tgs-word-lexprob"); job.setJarByClass(WordLexicalProbabilityCalculator.class); job.setMapperClass(WordLexicalProbabilityCalculator.Map.class); diff --git a/src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java index 65cbd14..d72392c 100644 --- a/src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java +++ b/src/main/java/edu/jhu/thrax/lexprob/SequenceFileLexprobTable.java @@ -11,6 +11,7 @@ import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.Reader.Option; /** * A base class for lexical probability tables that will be read from a Hadoop sequence file that is @@ -52,8 +53,6 @@ protected static Iterable getSequenceFileIterator(FileSystem theFS, final FloatWritable d = new FloatWritable(0.0f); final FileStatus[] theFiles = files; final Configuration theConf = conf; - final FileSystem fs = theFS; - final Iterator iterator = new Iterator() { int fileIndex = 0; TableEntry lookahead = null; @@ -66,7 +65,8 @@ public boolean hasNext() { if (lookahead != null) return true; // if the reader is null, we haven't looked at a single // file yet, so set the reader to read the first file - if (reader == null) reader = new SequenceFile.Reader(fs, theFiles[0].getPath(), theConf); + Option fFile = SequenceFile.Reader.file(theFiles[0].getPath()); + if (reader == null) reader = new SequenceFile.Reader(theConf, fFile); // reader is not null here, so try to read an entry boolean gotNew = reader.next(pair, d); if (gotNew) { @@ -79,7 +79,8 @@ public boolean hasNext() { // but if there are no more, return false if (fileIndex >= theFiles.length) return false; reader.close(); - reader = new SequenceFile.Reader(fs, theFiles[fileIndex].getPath(), theConf); + Option file = SequenceFile.Reader.file(theFiles[fileIndex].getPath()); + reader = new SequenceFile.Reader(theConf, file); // new file, so try again gotNew = reader.next(pair, d); if (gotNew) { diff --git a/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java b/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java index 94293a6..b6b5e98 100644 --- a/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java +++ b/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java @@ -4,10 +4,10 @@ import java.util.logging.Logger; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.SequenceFile.Reader.Option; import edu.jhu.jerboa.util.FileManager; @@ -51,7 +51,8 @@ public static void main(String[] args) throws Exception { Text rule_string = new Text(); Configuration config = new Configuration(); Path path = new Path(input_file); - SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.getLocal(config), path, config); + Option fFile = SequenceFile.Reader.file(path); + SequenceFile.Reader reader = new SequenceFile.Reader(config, fFile); BufferedWriter grammar_writer = FileManager.getWriter(output_file); long rule_count = 0; diff --git a/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java b/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java index 40d97a6..805b53a 100644 --- a/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java +++ b/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java @@ -4,13 +4,12 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectOutputStream; -import java.net.URI; import java.util.logging.Logger; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.Reader.Option; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.hadoop.distributional.SignatureWritable; @@ -75,13 +74,14 @@ public static void main(String[] args) throws Exception { SequenceFile.Reader reader; if (local) { Path path = new Path(input_file); - reader = new SequenceFile.Reader(FileSystem.getLocal(config), path, config); + Option fFile = SequenceFile.Reader.file(path); + reader = new SequenceFile.Reader(config, fFile); } else { // TODO: Only works for completely specified URLs (i.e. hdfs://name-node/...), currently // disabled until I figure out how to get simple paths to work in HDFS. - FileSystem file_system = FileSystem.get(URI.create(input_file), config); Path path = new Path(input_file); - reader = new SequenceFile.Reader(file_system, path, config); + Option fFile = SequenceFile.Reader.file(path); + reader = new SequenceFile.Reader(config, fFile); } int chunk_id = 0; diff --git a/src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java b/src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java index c895b2b..6698b53 100644 --- a/src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java +++ b/src/main/java/edu/jhu/thrax/util/SequenceFileCreator.java @@ -7,34 +7,37 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; - +import org.apache.hadoop.io.SequenceFile.Writer.Option; public class SequenceFileCreator { - public static void main(String [] argv) throws Exception - { - LongWritable k = new LongWritable(); - Text v = new Text(); + public static void main(String [] argv) throws Exception { + LongWritable k = new LongWritable(); + Text v = new Text(); - URI uri = URI.create(argv[0]); - Configuration conf = new Configuration(); - FileSystem fs = FileSystem.get(uri, conf); - Path path = new Path(argv[0]); - SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, LongWritable.class, Text.class); + URI uri = URI.create(argv[0]); + Configuration conf = new Configuration(); + @SuppressWarnings("unused") + FileSystem fs = FileSystem.get(uri, conf); + Path path = new Path(argv[0]); + Option fileOption = SequenceFile.Writer.file(path); + Option keyClassOpt = MapFile.Writer.keyClass(Text.class); + org.apache.hadoop.io.SequenceFile.Writer.Option valClassOpt = SequenceFile.Writer.valueClass(LongWritable.class); + SequenceFile.Writer writer = SequenceFile.createWriter(conf, fileOption, keyClassOpt, valClassOpt); - long current = 0; - Scanner scanner = new Scanner(System.in, "UTF-8"); - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - k.set(current); - v.set(line); - writer.append(k, v); - current++; - } - scanner.close(); - writer.close(); - return; - } + long current = 0; + Scanner scanner = new Scanner(System.in, "UTF-8"); + while (scanner.hasNextLine()) { + String line = scanner.nextLine(); + k.set(current); + v.set(line); + writer.append(k, v); + current++; + } + scanner.close(); + writer.close(); + return; + } } - diff --git a/src/main/java/edu/jhu/thrax/util/TestSetFilter.java b/src/main/java/edu/jhu/thrax/util/TestSetFilter.java index 0488d9a..fa60737 100644 --- a/src/main/java/edu/jhu/thrax/util/TestSetFilter.java +++ b/src/main/java/edu/jhu/thrax/util/TestSetFilter.java @@ -60,6 +60,7 @@ public void setRuleLength(int value) { private void getTestSentences(String filename) { try { + @SuppressWarnings("resource") Scanner scanner = new Scanner(new File(filename), "UTF-8"); while (scanner.hasNextLine()) { String line = scanner.nextLine(); @@ -114,6 +115,7 @@ public void filterGrammarToFile(String fullGrammarFile, String sentence, setSentence(sentence); try { + @SuppressWarnings("resource") Scanner scanner = new Scanner(new GZIPInputStream(new FileInputStream(fullGrammarFile)), "UTF-8"); int rulesIn = 0; @@ -336,6 +338,7 @@ public static void main(String[] argv) { filter.getTestSentences(argv[i]); } + @SuppressWarnings("resource") Scanner scanner = new Scanner(System.in, "UTF-8"); int rulesIn = 0; int rulesOut = 0; diff --git a/src/main/java/edu/jhu/thrax/util/Vocabulary.java b/src/main/java/edu/jhu/thrax/util/Vocabulary.java index a607b09..f2a6170 100644 --- a/src/main/java/edu/jhu/thrax/util/Vocabulary.java +++ b/src/main/java/edu/jhu/thrax/util/Vocabulary.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.Reader.Option; import org.apache.hadoop.io.Text; import edu.jhu.thrax.hadoop.features.SimpleFeature; @@ -165,7 +166,8 @@ public static boolean initialize(Configuration conf, String file_glob) throws IO initialize(conf); for (FileStatus file : files) { - SequenceFile.Reader reader = new SequenceFile.Reader(file_system, file.getPath(), conf); + Option fileOption = SequenceFile.Reader.file(file.getPath()); + SequenceFile.Reader reader = new SequenceFile.Reader(conf, fileOption); Text h_token = new Text(); IntWritable h_id = new IntWritable(); while (reader.next(h_id, h_token)) { diff --git a/src/main/java/edu/jhu/thrax/util/io/LineReader.java b/src/main/java/edu/jhu/thrax/util/io/LineReader.java index 7e020ae..77d6f50 100644 --- a/src/main/java/edu/jhu/thrax/util/io/LineReader.java +++ b/src/main/java/edu/jhu/thrax/util/io/LineReader.java @@ -90,7 +90,6 @@ public LineReader(BufferedReader reader) { * InputStream. This method is considered a hack which should be removed * once a better solution presents itself. */ - @SuppressWarnings("resource") @Deprecated public static final InputStream getInputStream(String filename) throws IOException { FileInputStream fis = new FileInputStream(filename); From 61526704fe03800eb55b45ce025a545c2c8d7fc2 Mon Sep 17 00:00:00 2001 From: Tobias Domhan Date: Wed, 9 Dec 2015 17:53:18 +0100 Subject: [PATCH 4/6] Good-Turing smoothed phrase probabilities as a new feature. --- .../CountOfRuleCountsEstimationJob.java | 131 ++++++++++++++++++ ...moothedSourcePhraseGivenTargetFeature.java | 118 ++++++++++++++++ ...moothedTargetPhraseGivenSourceFeature.java | 118 ++++++++++++++++ .../features/mapred/MapReduceFeature.java | 3 +- .../mapred/MapReduceFeatureFactory.java | 4 + .../SourcePhraseGivenTargetFeature.java | 4 +- .../TargetPhraseGivenSourceFeature.java | 4 +- .../mapred/coc/CountOfCountsEstimator.java | 118 ++++++++++++++++ .../mapred/coc/GoodTuringSmoother.java | 15 ++ src/edu/jhu/thrax/util/HdfsUtils.java | 49 +++++++ .../coc/CountOfCountsEstimatorTest.java | 39 ++++++ 11 files changed, 598 insertions(+), 5 deletions(-) create mode 100644 src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java create mode 100644 src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java create mode 100644 src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java create mode 100644 src/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java create mode 100644 src/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java create mode 100644 src/edu/jhu/thrax/util/HdfsUtils.java create mode 100644 test/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimatorTest.java diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java b/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java new file mode 100644 index 0000000..c54d1fb --- /dev/null +++ b/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java @@ -0,0 +1,131 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; + +import edu.jhu.thrax.hadoop.datatypes.Annotation; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.jobs.ExtractionJob; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; + +@SuppressWarnings("rawtypes") +public class CountOfRuleCountsEstimationJob implements ThraxJob { + + //single reducer, as this is where we carry out the regression for which we need all data in a central location + private static final int SINGLE_REDUCER = 1; + + public static final String NAME = "rule_count_of_counts"; + + public static final String COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH = "count-of-counts-estimator"; + + public String getName() { + return NAME; + } + + @Override + public String getOutputSuffix() { + return getName(); + } + + @Override + public Set> getPrerequisites() { + Set> result = new HashSet>(); + result.add(ExtractionJob.class); + return result; + } + + @Override + public Job getJob(Configuration conf) throws IOException { + String name = getName(); + Job job = new Job(conf, name); + job.setJarByClass(this.getClass()); + + job.setMapperClass(this.mapperClass()); + job.setCombinerClass(IntSumReducer.class); + job.setReducerClass(CountOfCountsRegressionReducer.class); + + job.setInputFormatClass(SequenceFileInputFormat.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(SequenceFileOutputFormat.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(IntWritable.class); + + int num_reducers = conf.getInt("thrax.reducers", SINGLE_REDUCER); + job.setNumReduceTasks(num_reducers); + + FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "rules")); + FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + name)); + return job; + } + + public Class mapperClass() { + return CustomMap.class; + } + + private static class CustomMap extends Mapper { + + private final static IntWritable ONE = new IntWritable(1); + + protected void map(RuleWritable key, Annotation value, Context context) throws IOException, + InterruptedException { + IntWritable count = new IntWritable(value.count()); + context.write(count, ONE); + } + } + + /** + * Writes counts of counts and produces a linear regression of the log-log plot of the data. + */ + private static class CountOfCountsRegressionReducer extends IntSumReducer { + + private Map countOfCounts = new HashMap(); + + public void reduce(IntWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (countOfCounts.containsKey(key.get())) { + throw new RuntimeException( + String.format("Duplicate key %d in counts of counts.", key.get())); + } + + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + countOfCounts.put(key.get(), sum); + } + + @Override + protected void cleanup( + Reducer.Context context) + throws IOException, InterruptedException { + CountOfCountsEstimator estimator = CountOfCountsEstimator.regress(countOfCounts); + System.err.println(String.format( + "Created CountOfCountsEstimator with slope %f and intercept %f", + estimator.getSlope(), estimator.getIntercept())); + + Configuration conf = context.getConfiguration(); + Path outPath = new Path(conf.getRaw("thrax.work-dir"), COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + + HdfsUtils.writeObjectToFs(conf, estimator, outPath); + } + } +} diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java b/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java new file mode 100644 index 0000000..610b0d7 --- /dev/null +++ b/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java @@ -0,0 +1,118 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; + +import edu.jhu.thrax.hadoop.comparators.PrimitiveArrayMarginalComparator; +import edu.jhu.thrax.hadoop.datatypes.FeaturePair; +import edu.jhu.thrax.hadoop.datatypes.PrimitiveUtils; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.features.mapred.coc.GoodTuringSmoother; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; +import edu.jhu.thrax.util.Vocabulary; + +@SuppressWarnings("rawtypes") +public class GoodTuringSmoothedSourcePhraseGivenTargetFeature extends MapReduceFeature { + + public static final String NAME = "f_given_e_phrase_gt_smoothed"; + public static final String LABEL = "p_gt(f|e)"; + + public String getName() { + return NAME; + } + + public String getLabel() { + return LABEL; + } + + @Override + public Set> getPrerequisites() { + Set> parentPrerequisites = super.getPrerequisites(); + Set> prerequisites = new HashSet>(parentPrerequisites.size()+1); + prerequisites.add(CountOfRuleCountsEstimationJob.class); + return prerequisites; + } + + public Class sortComparatorClass() { + return SourcePhraseGivenTargetFeature.Comparator.class; + } + + public Class partitionerClass() { + return RuleWritable.TargetPartitioner.class; + } + + public Class mapperClass() { + return SourcePhraseGivenTargetFeature.Map.class; + } + + public Class reducerClass() { + return Reduce.class; + } + + private static class Reduce extends Reducer { + private int marginal; + private FloatWritable prob; + + private GoodTuringSmoother goodTuringSmoother; + + protected void setup(Context context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + String vocabulary_path = conf.getRaw("thrax.work-dir") + "vocabulary/part-*"; + Vocabulary.initialize(conf, vocabulary_path); + + Path inPath = new Path(conf.getRaw("thrax.work-dir"), + CountOfRuleCountsEstimationJob.COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + try { + goodTuringSmoother = new GoodTuringSmoother(HdfsUtils.readObjectFromFs(conf, inPath)); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + protected void reduce(RuleWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (Arrays.equals(key.source, PrimitiveArrayMarginalComparator.MARGINAL)) { + marginal = 0; + for (IntWritable x : values) + marginal += x.get(); + return; + } + if (key.lhs == PrimitiveUtils.MARGINAL_ID) { + int count = 0; + for (IntWritable x : values) + count += x.get(); + + double smoothedCount = goodTuringSmoother.smoothedCount(count); + + prob = new FloatWritable((float) -Math.log(smoothedCount / (float) marginal)); + return; + } + context.write(key, new FeaturePair(Vocabulary.id(LABEL), prob)); + } + + } + + private static final FloatWritable ZERO = new FloatWritable(0.0f); + + public void unaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } + + public void binaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } +} diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java b/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java new file mode 100644 index 0000000..ede8b69 --- /dev/null +++ b/src/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java @@ -0,0 +1,118 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; + +import edu.jhu.thrax.hadoop.comparators.PrimitiveArrayMarginalComparator; +import edu.jhu.thrax.hadoop.datatypes.FeaturePair; +import edu.jhu.thrax.hadoop.datatypes.PrimitiveUtils; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.features.mapred.coc.GoodTuringSmoother; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; +import edu.jhu.thrax.util.Vocabulary; + +@SuppressWarnings("rawtypes") +public class GoodTuringSmoothedTargetPhraseGivenSourceFeature extends MapReduceFeature { + + public static final String NAME = "e_given_f_phrase_gt_smoothed"; + public static final String LABEL = "p_gt(e|f)"; + + public String getName() { + return NAME; + } + + public String getLabel() { + return LABEL; + } + + @Override + public Set> getPrerequisites() { + Set> parentPrerequisites = super.getPrerequisites(); + Set> prerequisites = new HashSet>(parentPrerequisites.size()+1); + prerequisites.add(CountOfRuleCountsEstimationJob.class); + return prerequisites; + } + + public Class sortComparatorClass() { + return TargetPhraseGivenSourceFeature.Comparator.class; + } + + public Class partitionerClass() { + return RuleWritable.SourcePartitioner.class; + } + + public Class mapperClass() { + return TargetPhraseGivenSourceFeature.Map.class; + } + + public Class reducerClass() { + return Reduce.class; + } + + private static class Reduce extends Reducer { + private int marginal; + private FloatWritable prob; + + private GoodTuringSmoother goodTuringSmoother; + + protected void setup(Context context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + String vocabulary_path = conf.getRaw("thrax.work-dir") + "vocabulary/part-*"; + Vocabulary.initialize(conf, vocabulary_path); + + Path inPath = new Path(conf.getRaw("thrax.work-dir"), + CountOfRuleCountsEstimationJob.COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + try { + goodTuringSmoother = new GoodTuringSmoother(HdfsUtils.readObjectFromFs(conf, inPath)); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + protected void reduce(RuleWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (Arrays.equals(key.target, PrimitiveArrayMarginalComparator.MARGINAL)) { + marginal = 0; + for (IntWritable x : values) + marginal += x.get(); + return; + } + if (key.lhs == PrimitiveUtils.MARGINAL_ID) { + int count = 0; + for (IntWritable x : values) + count += x.get(); + + double smoothedCount = goodTuringSmoother.smoothedCount(count); + + prob = new FloatWritable((float) -Math.log(smoothedCount / (float) marginal)); + return; + } + context.write(key, new FeaturePair(Vocabulary.id(LABEL), prob)); + } + + } + + private static final FloatWritable ZERO = new FloatWritable(0.0f); + + public void unaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } + + public void binaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } +} diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java b/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java index 25ffd3d..16feac5 100644 --- a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java +++ b/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeature.java @@ -64,7 +64,8 @@ public Job getJob(Configuration conf) throws IOException { setMapOutputFormat(job); - int num_reducers = conf.getInt("thrax.reducers", conf.getInt("mapreduce.job.reduces", DefaultValues.DEFAULT_NUM_REDUCERS)); + int num_reducers = conf.getInt("thrax.reducers", + conf.getInt("mapreduce.job.reduces", DefaultValues.DEFAULT_NUM_REDUCERS)); job.setNumReduceTasks(num_reducers); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "rules")); diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java b/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java index f64536e..d6d0720 100644 --- a/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java +++ b/src/edu/jhu/thrax/hadoop/features/mapred/MapReduceFeatureFactory.java @@ -12,6 +12,10 @@ public static MapReduceFeature get(String name) { return new SourcePhraseGivenTargetFeature(); else if (name.equals(TargetPhraseGivenSourceFeature.NAME)) return new TargetPhraseGivenSourceFeature(); + else if (name.equals(GoodTuringSmoothedTargetPhraseGivenSourceFeature.NAME)) + return new GoodTuringSmoothedTargetPhraseGivenSourceFeature(); + else if (name.equals(GoodTuringSmoothedSourcePhraseGivenTargetFeature.NAME)) + return new GoodTuringSmoothedSourcePhraseGivenTargetFeature(); else if (name.equals(SourcePhraseGivenLHSFeature.NAME)) return new SourcePhraseGivenLHSFeature(); else if (name.equals(LhsGivenSourcePhraseFeature.NAME)) diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java b/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java index 1808150..c9d01f7 100644 --- a/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java +++ b/src/edu/jhu/thrax/hadoop/features/mapred/SourcePhraseGivenTargetFeature.java @@ -51,7 +51,7 @@ public Class reducerClass() { return Reduce.class; } - private static class Map extends Mapper { + protected static class Map extends Mapper { protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); @@ -107,7 +107,7 @@ protected void reduce(RuleWritable key, Iterable values, Context co } - public static class Comparator extends WritableComparator { + protected static class Comparator extends WritableComparator { private static final WritableComparator PARRAY_COMP = new PrimitiveArrayMarginalComparator(); private static final FieldComparator SOURCE_COMP = new FieldComparator(0, PARRAY_COMP); diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java b/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java index 54ece38..cdcc767 100644 --- a/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java +++ b/src/edu/jhu/thrax/hadoop/features/mapred/TargetPhraseGivenSourceFeature.java @@ -51,7 +51,7 @@ public Class reducerClass() { return Reduce.class; } - private static class Map extends Mapper { + protected static class Map extends Mapper { protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); @@ -108,7 +108,7 @@ protected void reduce(RuleWritable key, Iterable values, Context co } - public static class Comparator extends WritableComparator { + protected static class Comparator extends WritableComparator { private static final WritableComparator PARRAY_COMP = new PrimitiveArrayMarginalComparator(); private static final FieldComparator SOURCE_COMP = new FieldComparator(0, PARRAY_COMP); diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java b/src/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java new file mode 100644 index 0000000..c60a6b5 --- /dev/null +++ b/src/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java @@ -0,0 +1,118 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.hadoop.features.mapred.coc; + +import static java.lang.Math.*; + +import java.io.Serializable; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Linear estimator of count of counts in log-log space. + * + * y = exp(slope * log(count) + intercept) + */ +public class CountOfCountsEstimator implements Serializable { + + private static final long serialVersionUID = -7988102132725579097L; + + private final double slope; + private final double intercept; + + public CountOfCountsEstimator(double slope, double intercept) { + this.slope = slope; + this.intercept = intercept; + } + + public double getSlope() { + return slope; + } + + public double getIntercept() { + return intercept; + } + + public long getRoundedCountOfCount(long count) { + return Math.round(getEstimatedCountOfCount(count)); + } + + public double getEstimatedCountOfCount(long count) { + return exp(slope * log(count) + intercept); + } + + public static CountOfCountsEstimator regress(Map countOfCountsMap) { + double[] counts = new double[countOfCountsMap.size()]; + double[] countOfCounts = new double[countOfCountsMap.size()]; + int idx = 0; + for (Entry e : countOfCountsMap.entrySet()) { + counts[idx] = e.getKey(); + countOfCounts[idx] = e.getValue(); + idx += 1; + } + return regress(counts, countOfCounts); + } + + /** + * Weighted least squares regression in log-log space of count of counts data. + * + * We can solve this by OLS with scaling our input data with sqrt(weight). + * As a weight we use the count of counts of each data point. + * This is the more often a count appears the more weight it gets. + * + * We get: + * x1: sqrt(counts of counts) + * x2: sqrt(counts of counts) * log(counts) + * y: sqrt(counts of counts) * log(counts of counts) + * w: weighted by x (the counts) + * + * OLS solution is: + * (X^T X)^-1 X^Ty + */ + public static CountOfCountsEstimator regress(double[] counts, double[] countsOfCounts) { + if (!(counts.length == countsOfCounts.length)) { + throw new RuntimeException("Dimensions of counts and countsOfCounts must match."); + } + + final int numDataPoints = counts.length; + double[] x1 = new double[numDataPoints]; + double[] x2 = new double[numDataPoints]; + double[] y = new double[numDataPoints]; + for (int i = 0; i < numDataPoints; i++) { + double sqrt_of_weight = sqrt(countsOfCounts[i]); + x1[i] = sqrt_of_weight * 1.0; // bias (for intercept) + x2[i] = sqrt_of_weight * log(counts[i]); // feature + y[i] = sqrt_of_weight * log(countsOfCounts[i]); // target + } + + //X^T X + double xs00 = 0; + double xs01 = 0; //symmetric matrix: xs01 == xs10 + double xs11 = 0; + for (int j = 0; j < x1.length; j++) { + xs00 += x1[j] * x1[j]; + xs01 += x1[j] * x2[j]; + xs11 += x2[j] * x2[j]; + } + + // matrix inverse to get (X^T X)^-1 + double denom = xs00 * xs11 - xs01 * xs01; + double xs00_inv = xs11 / denom; + double xs01_inv = -xs01 / denom; + double xs11_inv = xs00 / denom; + + //X^T y + double xty0 = 0.; + double xty1 = 0.; + for (int j = 0; j < x1.length; j++) { + xty0 += x1[j] * y[j]; + xty1 += x2[j] *y [j]; + } + + //bringing everything together: [intercept slope]^T = (X^T X)^-1 X^T y + double intercept = xs00_inv * xty0 + xty1 * xs01_inv; + double slope = xs01_inv * xty0 + xty1 * xs11_inv; + + return new CountOfCountsEstimator(slope, intercept); + } + +} diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java b/src/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java new file mode 100644 index 0000000..4f0693a --- /dev/null +++ b/src/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java @@ -0,0 +1,15 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.hadoop.features.mapred.coc; + +public class GoodTuringSmoother { + private CountOfCountsEstimator estimator; + + public GoodTuringSmoother(CountOfCountsEstimator estimator) { + this.estimator = estimator; + } + + public double smoothedCount(int count) { + double turingFraction = estimator.getEstimatedCountOfCount(count + 1) / estimator.getEstimatedCountOfCount(count); + return (count + 1) * turingFraction; + } +} diff --git a/src/edu/jhu/thrax/util/HdfsUtils.java b/src/edu/jhu/thrax/util/HdfsUtils.java new file mode 100644 index 0000000..02f3c49 --- /dev/null +++ b/src/edu/jhu/thrax/util/HdfsUtils.java @@ -0,0 +1,49 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.util; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class HdfsUtils { + + private HdfsUtils() {}; + + public static void writeObjectToFs(Configuration conf, E object, Path outPath) throws IOException { + FileSystem hdfs = FileSystem.get(conf); + + ObjectOutputStream oos = null; + try { + FSDataOutputStream out = hdfs.create(outPath); + oos = new ObjectOutputStream(out); + oos.writeObject(object); + } finally { + if (oos != null) { + oos.close(); + } + } + } + + public static E readObjectFromFs(Configuration conf, Path inPath) throws IOException,ClassNotFoundException { + FileSystem hdfs = FileSystem.get(conf); + + ObjectInputStream ois = null; + try { + FSDataInputStream in = hdfs.open(inPath); + ois = new ObjectInputStream(in); + @SuppressWarnings("unchecked") + E object = (E) ois.readObject(); + return object; + } finally { + if (ois != null) { + ois.close(); + } + } + } +} diff --git a/test/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimatorTest.java b/test/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimatorTest.java new file mode 100644 index 0000000..fdefb08 --- /dev/null +++ b/test/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimatorTest.java @@ -0,0 +1,39 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.hadoop.features.mapred.coc; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CountOfCountsEstimatorTest { + + private static double[] doublesFromString(String text) { + String[] data = text.split(", "); + double[] doubles = new double[data.length]; + for (int i = 0; i < data.length; i++) { + doubles[i] = Double.valueOf(data[i]); + } + return doubles; + } + + @Test + public void testSampleDataFit() { + String countsExampleData = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 1212771, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, 1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1429, 1430, 1431, 1432, 1433, 1434, 1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 1627, 1628, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647, 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663, 1664, 1665, 1666, 1667, 1668, 1669, 1670, 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1696, 1697, 1698, 1699, 1700, 1701, 1702, 1703, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1716, 229662, 1718, 1719, 1720, 1721, 1722, 1723, 1724, 1725, 1726, 1727, 1728, 1729, 1730, 1731, 1732, 1733, 1734, 1735, 1736, 1737, 1738, 1739, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1750, 1751, 1752, 1753, 1754, 1755, 1756, 1757, 1758, 1759, 1760, 1761, 1762, 1763, 1764, 1765, 1766, 1767, 1768, 1769, 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785, 1786, 1787, 1788, 1789, 1790, 1791, 1792, 1793, 1794, 1795, 1796, 1797, 1798, 1799, 1800, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1808, 1809, 1810, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1818, 1819, 1820, 1821, 1822, 1823, 1824, 1825, 1826, 1827, 1828, 1829, 1830, 1831, 1832, 1833, 1834, 1835, 1836, 1837, 1838, 1839, 1840, 1841, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 67484, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 67533, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031, 2032, 2033, 2034, 2035, 2036, 2037, 2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 67582, 2047, 2048, 2049, 2050, 2051, 2052, 2053, 2054, 2055, 2056, 2057, 2058, 2059, 2060, 2061, 2062, 2063, 2064, 2065, 2066, 2067, 2068, 2069, 2070, 2071, 2072, 2073, 2074, 2075, 231452, 2077, 2078, 2079, 2080, 2081, 2082, 2083, 2084, 2085, 2086, 2087, 2088, 2089, 2090, 2091, 2092, 2093, 2094, 2096, 2097, 2098, 2099, 2100, 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108, 2109, 2110, 2111, 2112, 2113, 2114, 2115, 2116, 2117, 2118, 2119, 2120, 2121, 2122, 2123, 2124, 2125, 2126, 2127, 2128, 2130, 355, 2132, 2133, 2134, 2135, 2136, 2138, 2139, 2140, 2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148, 2149, 2150, 2151, 2152, 2153, 2154, 2155, 2156, 2157, 2158, 2159, 2160, 2161, 2162, 2163, 2164, 2165, 2166, 2167, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2176, 2177, 2178, 2179, 2180, 2181, 2182, 2183, 2184, 2185, 2186, 2187, 2188, 2189, 2190, 2191, 2192, 2193, 2194, 2195, 2196, 2198, 2199, 2200, 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208, 2209, 2210, 2211, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2231, 2232, 2233, 2234, 2235, 2236, 2237, 2238, 2239, 2240, 2241, 2242, 2243, 2244, 2245, 2246, 2247, 2248, 2249, 2250, 2251, 2252, 2253, 2254, 2255, 2257, 2259, 2260, 2261, 2262, 2263, 2264, 2265, 2266, 2267, 2268, 2269, 100574, 2271, 2272, 2273, 2274, 2275, 2276, 2277, 2278, 2279, 2280, 2281, 2282, 2283, 2284, 2285, 2286, 2287, 2288, 2289, 2291, 2292, 2293, 2294, 2295, 2296, 2297, 2298, 2299, 2300, 2301, 2302, 2303, 2304, 2305, 2306, 2307, 2308, 2309, 2310, 2311, 2312, 2313, 2314, 2315, 2316, 2317, 2318, 2319, 2320, 2321, 2322, 35091, 2324, 2325, 2326, 2327, 2328, 2329, 2330, 2331, 2332, 2333, 2334, 2335, 2336, 2337, 2338, 2339, 2340, 2341, 2342, 2343, 2344, 2345, 2346, 2347, 2348, 2350, 2351, 2352, 2353, 2354, 2355, 2356, 2357, 2358, 2359, 2360, 2361, 2362, 2363, 2364, 2365, 2366, 2367, 2368, 2369, 2370, 2371, 2372, 2373, 2374, 2375, 2376, 2377, 2378, 2379, 2380, 2381, 2382, 2383, 2384, 2385, 2386, 2387, 2388, 2389, 2390, 2391, 2392, 2393, 2394, 2395, 2396, 2397, 2398, 2399, 2400, 2401, 2402, 2403, 2404, 2405, 2406, 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2423, 2424, 2425, 2426, 2427, 2428, 2429, 2430, 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2443, 2444, 2445, 2446, 2447, 2448, 2450, 2451, 2452, 2453, 2454, 2455, 35224, 2457, 2458, 2459, 2460, 2461, 2462, 2463, 2464, 2465, 2466, 2467, 2468, 2469, 2470, 2471, 2472, 2474, 2475, 2476, 2477, 2478, 2479, 2480, 2481, 2483, 2484, 2485, 2486, 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2497, 2498, 2499, 2500, 2501, 2502, 2503, 2505, 2506, 2507, 2508, 2509, 2510, 2511, 2512, 2513, 2514, 2515, 2516, 2517, 2518, 2519, 2520, 2521, 2522, 2523, 2524, 2526, 2527, 2528, 2529, 2530, 2531, 2532, 2533, 2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 2544, 2545, 2546, 2548, 2549, 2550, 2551, 2552, 2553, 2554, 2555, 2556, 2557, 2558, 2559, 2561, 2562, 2563, 2564, 2565, 2566, 2567, 2568, 2569, 2570, 2571, 2572, 2574, 2575, 2576, 2577, 2578, 2579, 2580, 2581, 2582, 2583, 2584, 2585, 2586, 2587, 2588, 2589, 2590, 2591, 2592, 2593, 2594, 2595, 2596, 2597, 2598, 2599, 2601, 2602, 2603, 100908, 2605, 2606, 2607, 2608, 2609, 2610, 2611, 2612, 2613, 2614, 2615, 2616, 2617, 2618, 2619, 2620, 2621, 2622, 2623, 2624, 2625, 2626, 2627, 2629, 2630, 2631, 2633, 2634, 33207, 2636, 2637, 2638, 2639, 2640, 2641, 2642, 2643, 2644, 2645, 2646, 2647, 2648, 2649, 2650, 2651, 2652, 2653, 2654, 2655, 2656, 2657, 2658, 2660, 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, 2672, 2673, 2674, 2675, 2676, 2677, 2678, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2691, 2692, 2693, 2694, 2695, 2696, 2697, 2698, 2699, 2700, 33218, 2702, 2703, 2704, 2705, 2706, 2707, 35476, 2709, 2710, 2711, 2712, 2714, 2715, 2716, 2717, 2718, 2719, 2720, 2721, 2722, 2723, 2724, 2725, 2726, 2728, 2730, 2731, 2732, 2733, 2734, 2736, 2737, 2738, 2739, 2740, 2741, 2742, 2743, 2744, 2745, 2746, 2747, 2748, 2749, 2750, 2751, 2752, 35522, 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, 2764, 2765, 2766, 2767, 2768, 2769, 2770, 2771, 2772, 2773, 2774, 2775, 2776, 2777, 2779, 2780, 2781, 2782, 2783, 2784, 2785, 2786, 2787, 2788, 2789, 2790, 2791, 2792, 2793, 2794, 2796, 2797, 2798, 2799, 2800, 2801, 2802, 2803, 2804, 2805, 2806, 2807, 2808, 2809, 2810, 2811, 2812, 2813, 2814, 2816, 2817, 2818, 2819, 2820, 2822, 2823, 2824, 2825, 2826, 2827, 2828, 2829, 2830, 2831, 2832, 2834, 2835, 2836, 2837, 2838, 2840, 2841, 2842, 2844, 2845, 2846, 2847, 2848, 2849, 2850, 2851, 2853, 2854, 2855, 2856, 2857, 2858, 2859, 2860, 2861, 2862, 2863, 2864, 2867, 2868, 2869, 2870, 2871, 2872, 2873, 2874, 2875, 2876, 2877, 2878, 2879, 2880, 2881, 2883, 2884, 2885, 2887, 2888, 2889, 2890, 2891, 2892, 2894, 2895, 2896, 2897, 2898, 2899, 2900, 2901, 2902, 2903, 2904, 2905, 2906, 2907, 2908, 2910, 2911, 2912, 2913, 2914, 2915, 2916, 2917, 2918, 2919, 2920, 2921, 2923, 2924, 2925, 2926, 2927, 2928, 2929, 2930, 2931, 2932, 2933, 2934, 2935, 2936, 2937, 2938, 2940, 2941, 2942, 2943, 2944, 2945, 2946, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2957, 2958, 2960, 2961, 2962, 2963, 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2972, 2973, 2974, 2975, 2976, 2977, 2978, 2979, 2981, 2982, 2983, 2984, 2985, 2987, 2988, 2991, 2992, 2993, 2994, 2996, 2997, 2998, 2999, 3000, 3001, 3002, 3003, 3004, 3005, 3007, 3008, 3009, 3010, 3012, 3013, 3014, 3015, 3016, 101321, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3027, 3028, 3030, 3031, 3032, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3043, 3044, 3045, 3046, 3048, 3049, 3050, 3051, 3052, 3054, 3055, 3056, 3057, 3058, 3060, 3061, 3062, 3063, 3064, 3065, 3066, 3067, 3068, 3069, 3070, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3079, 3080, 3081, 3082, 3084, 3085, 3086, 3087, 3088, 3089, 3090, 3091, 3092, 3093, 3094, 3095, 3096, 3097, 3098, 3099, 3100, 3101, 3102, 3104, 3108, 3109, 3110, 3111, 3112, 3113, 3115, 3116, 3117, 3119, 3120, 3121, 3122, 3123, 3124, 3125, 3126, 35896, 3129, 3130, 3131, 3132, 3133, 3135, 3136, 3137, 3139, 3141, 3142, 3143, 3144, 3145, 3146, 3147, 3148, 3149, 3150, 3151, 3152, 3153, 3154, 3155, 3156, 3158, 3159, 3160, 32839, 3163, 3164, 3165, 3166, 3167, 3168, 3171, 3172, 3173, 3174, 3175, 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183, 3184, 3185, 3186, 3187, 3189, 3194, 3195, 3196, 3198, 3199, 3200, 3202, 3203, 3205, 3206, 3207, 3208, 3209, 3210, 3211, 3212, 3214, 3215, 3216, 3217, 3218, 3219, 3220, 3221, 3222, 3223, 3224, 3225, 3226, 3227, 3228, 3230, 3231, 3233, 3234, 3235, 3237, 3238, 3239, 3240, 3241, 3242, 3243, 3244, 3245, 3246, 3247, 3248, 3250, 3251, 3252, 3253, 3254, 3255, 3256, 3258, 3259, 3260, 3261, 3262, 3264, 3265, 3267, 3268, 3269, 3270, 3271, 3272, 3273, 3274, 3275, 3276, 3277, 3278, 3279, 3280, 3281, 3282, 3283, 3284, 3285, 3287, 3288, 3290, 3291, 3292, 3293, 3294, 3296, 3297, 3298, 3299, 3300, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3310, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3323, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3332, 3333, 3334, 3335, 3337, 3338, 3339, 3341, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3354, 3355, 3356, 3357, 36127, 3360, 3361, 3362, 3363, 3364, 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, 3381, 3383, 3384, 3385, 3386, 3387, 3389, 3390, 3392, 3393, 3394, 3395, 3396, 3397, 3398, 3399, 3400, 3402, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3414, 3415, 3416, 3417, 3418, 3419, 3421, 3422, 3423, 3427, 3428, 3429, 3431, 3432, 3435, 3436, 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3449, 3450, 3452, 3453, 3456, 3457, 3459, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3476, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 33350, 3494, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3515, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 36294, 3527, 36296, 3529, 3530, 3532, 3533, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3545, 3546, 3547, 3548, 3549, 3550, 3552, 3554, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3565, 3566, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3574, 3575, 3577, 3578, 3579, 3581, 3582, 3583, 3585, 3586, 3587, 3588, 3589, 3591, 3592, 3594, 3595, 3596, 3599, 3600, 3601, 3602, 3606, 3610, 3611, 3612, 3613, 65588, 3616, 3619, 3620, 3621, 3622, 3623, 3624, 3626, 3627, 3628, 3629, 3630, 3633, 3634, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3646, 3648, 3649, 3650, 3651, 3652, 3654, 3655, 3656, 3657, 3658, 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3671, 3673, 3674, 3676, 3677, 3678, 3680, 3681, 3682, 3683, 3684, 3685, 3686, 36455, 3688, 3689, 3692, 3693, 3694, 3696, 3697, 3698, 3700, 3701, 3703, 3706, 3708, 3711, 3713, 3714, 3716, 3717, 3718, 3719, 3720, 3721, 3723, 3724, 3725, 3726, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3736, 3737, 3738, 3739, 3741, 3742, 3743, 3744, 3745, 3746, 3749, 3751, 3754, 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3770, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, 3782, 3783, 3784, 3786, 3787, 3788, 3789, 3791, 3792, 3794, 3795, 3796, 3799, 3800, 3801, 3802, 3803, 3804, 36573, 3806, 3808, 3809, 3810, 3813, 3815, 3816, 3817, 3818, 3819, 3820, 3821, 3822, 3824, 3825, 3826, 3827, 3828, 3829, 3833, 3834, 3836, 3838, 3839, 3840, 3842, 3843, 3844, 3845, 3846, 3847, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3858, 3859, 3860, 3861, 36632, 3865, 3866, 3868, 3869, 3870, 3871, 3872, 3873, 3875, 3876, 3877, 3878, 3879, 3880, 38876, 3882, 3883, 3885, 3886, 3888, 3889, 3890, 3891, 3892, 3894, 3895, 3896, 3899, 3900, 3901, 3903, 3904, 3905, 3907, 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, 3916, 3918, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3929, 3930, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3950, 3951, 3952, 3953, 3955, 3957, 3958, 3959, 3960, 3964, 3965, 3967, 3968, 3969, 3971, 3972, 3974, 3975, 3978, 3983, 3984, 3985, 3989, 3991, 3994, 3995, 3998, 3999, 4000, 4001, 4003, 4004, 4005, 4006, 4010, 4011, 4012, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4024, 4027, 4028, 4030, 4031, 4033, 4036, 4037, 4039, 4040, 4041, 4042, 4043, 4044, 4046, 4049, 4050, 4052, 4053, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4069, 4070, 4071, 4072, 4074, 4075, 4076, 4077, 4078, 4080, 4081, 4085, 4086, 4087, 4089, 4090, 4091, 4092, 4095, 4098, 4101, 4102, 4104, 4106, 4108, 4109, 4110, 4112, 4113, 4114, 4115, 4117, 4120, 4121, 4124, 4126, 4129, 4131, 4132, 4134, 4135, 4138, 4140, 4141, 4142, 4144, 4145, 4147, 4148, 4149, 4150, 4151, 4154, 4155, 4156, 4157, 4159, 4160, 4161, 4162, 4165, 4167, 4170, 4172, 4173, 4174, 4175, 4176, 4177, 4179, 4180, 4182, 4184, 4188, 4189, 4190, 4193, 4194, 331875, 4196, 4198, 4199, 4200, 4201, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213, 4219, 4223, 4224, 4226, 4227, 4230, 4231, 4232, 4233, 4234, 4235, 4238, 4239, 4242, 4245, 4246, 4247, 4248, 4250, 4251, 4252, 4254, 4256, 4257, 4258, 4261, 4264, 69804, 4269, 4272, 4273, 4275, 4280, 4282, 4284, 4285, 4286, 4287, 4288, 4289, 4290, 4291, 4292, 4295, 4296, 4297, 4298, 4300, 4301, 4302, 4304, 4305, 4308, 4310, 4312, 4315, 4321, 4322, 4323, 4325, 4326, 4328, 4330, 4332, 4333, 4334, 4339, 4342, 4343, 4345, 4347, 4350, 4351, 4353, 4354, 4355, 4356, 4357, 4358, 4359, 4361, 4362, 4364, 4366, 4367, 4368, 4369, 4370, 4372, 4373, 4379, 4380, 4384, 4386, 4387, 4388, 4390, 4394, 69931, 4398, 4399, 4400, 4404, 4405, 4406, 4410, 4411, 4413, 4414, 4415, 4417, 4420, 4422, 4425, 4426, 4429, 4431, 4432, 4433, 4434, 4438, 4440, 4441, 4442, 4443, 4444, 4445, 4446, 4447, 4449, 4451, 4452, 4453, 4454, 4458, 4460, 4461, 4462, 4463, 4467, 4468, 4469, 4470, 4471, 4474, 4480, 4483, 4484, 4485, 4488, 4489, 4490, 4491, 4492, 4493, 4494, 4495, 4496, 70034, 4499, 4500, 4501, 4502, 4504, 4506, 4507, 4508, 4510, 4512, 4513, 4516, 4517, 4519, 4521, 4522, 4523, 4524, 4527, 4528, 4530, 4531, 4535, 4536, 4537, 4538, 4540, 4542, 4543, 4544, 4545, 4546, 4548, 4549, 4550, 4552, 4553, 4554, 4555, 37324, 4557, 4558, 4559, 4561, 4564, 4565, 4569, 4572, 4573, 4574, 4575, 4576, 4577, 4578, 4580, 4582, 4583, 4584, 4585, 4586, 4589, 4590, 4592, 4596, 4597, 4600, 4601, 4602, 4605, 4606, 4607, 4608, 4609, 4616, 4621, 4623, 4624, 4627, 4630, 4631, 4635, 4636, 4640, 4641, 4642, 4643, 4644, 4647, 4648, 4651, 4652, 4653, 4654, 4655, 4659, 4662, 4663, 4665, 4669, 4670, 4672, 4675, 4676, 4677, 4678, 4679, 4681, 4683, 4685, 4686, 4687, 4688, 4689, 4690, 4691, 4692, 4696, 4700, 4701, 4705, 4706, 4707, 4708, 4709, 4710, 33553, 4712, 4713, 4714, 4715, 4716, 4718, 4719, 4720, 4721, 4722, 4723, 4724, 4726, 4731, 4733, 4734, 4735, 4736, 4738, 4743, 4744, 4745, 4746, 4748, 4749, 4750, 4751, 4752, 4753, 4757, 4758, 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, 4769, 4770, 4771, 4773, 4775, 4776, 4784, 4787, 4788, 4790, 4794, 4797, 4800, 4801, 4803, 4804, 4805, 4806, 4807, 4810, 4813, 4814, 4815, 4816, 4817, 4821, 4823, 4824, 4825, 4830, 66341, 4832, 4833, 4835, 4839, 4840, 4841, 4845, 4846, 4847, 4848, 4855, 4856, 4857, 4862, 4863, 4865, 4866, 4867, 4870, 4871, 4874, 234251, 4880, 4881, 4882, 4883, 4884, 4887, 4888, 37657, 4890, 4891, 4892, 4894, 4896, 4897, 4898, 4899, 4902, 4905, 4906, 4910, 4911, 4912, 4914, 4915, 4917, 4918, 4919, 4920, 4924, 4925, 234303, 4930, 4932, 4933, 4934, 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4945, 4946, 4948, 4949, 4952, 4955, 4957, 4959, 4960, 4961, 4965, 4967, 4972, 4973, 4975, 4976, 4977, 4979, 4981, 4982, 37751, 4985, 4986, 4994, 4998, 5000, 5002, 5004, 5011, 5013, 5014, 5017, 5018, 5021, 5029, 5031, 5035, 5036, 5038, 5042, 5043, 5044, 5051, 5052, 5056, 5057, 5059, 5060, 5063, 5064, 5065, 5066, 5070, 5072, 5074, 5076, 5078, 5080, 5081, 5082, 5083, 5087, 5088, 5089, 5091, 5095, 5096, 5099, 5102, 5105, 5108, 5110, 5111, 70648, 5114, 5115, 5116, 5118, 5119, 5120, 5121, 5123, 5124, 5126, 5129, 5134, 5136, 5137, 5138, 5139, 5142, 5143, 37912, 5148, 5149, 5150, 5158, 5160, 5161, 5162, 5163, 5164, 5166, 5167, 5168, 5169, 5170, 37940, 5173, 5176, 5179, 5182, 5183, 5185, 5186, 5190, 5191, 5192, 37963, 5196, 5197, 5199, 5200, 5201, 5202, 5207, 5211, 5212, 5215, 5221, 5223, 5224, 5225, 5226, 5228, 5229, 5230, 5231, 38000, 5233, 5234, 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5243, 5245, 5249, 5251, 5253, 5254, 5256, 5258, 5265, 5267, 5272, 5274, 5275, 5276, 5277, 5280, 5281, 5282, 5284, 5286, 5288, 5292, 5293, 5294, 5296, 5299, 5300, 5304, 5306, 5308, 5312, 5315, 5316, 5317, 5320, 5321, 5322, 38096, 5331, 103638, 5335, 5336, 38107, 5341, 5342, 5345, 5348, 5349, 5350, 5351, 5355, 5357, 5359, 5360, 5361, 5362, 5363, 5364, 5365, 5366, 5367, 5370, 5371, 5374, 5375, 5377, 5378, 5379, 5386, 5387, 5389, 5390, 70930, 5395, 5396, 5397, 5398, 5401, 5403, 5405, 5407, 5409, 5410, 5413, 5416, 5418, 5420, 5423, 5424, 5427, 5434, 5436, 5438, 5441, 5442, 32988, 5448, 5451, 5452, 5454, 5455, 5458, 5460, 5462, 5464, 5465, 164751, 5470, 5471, 5472, 5479, 5480, 5482, 5486, 5487, 5491, 5494, 5499, 5500, 5503, 5504, 5505, 5506, 5507, 5509, 5511, 5512, 5514, 5518, 5519, 5521, 5522, 5523, 5525, 5528, 5531, 5532, 5534, 5536, 5537, 71074, 5539, 5541, 5542, 5543, 5546, 5552, 5553, 5554, 5556, 5559, 5561, 5562, 5564, 5568, 5569, 5570, 5571, 5576, 5578, 5580, 5582, 5583, 5584, 5587, 5592, 5595, 5597, 5598, 5599, 5604, 38378, 5611, 5612, 5613, 5614, 5617, 5618, 5622, 5623, 5625, 5626, 5627, 5628, 5629, 5630, 5633, 5634, 5635, 5639, 5641, 5643, 5648, 5649, 5651, 5652, 5653, 5656, 5660, 5661, 5664, 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5674, 5676, 5680, 66748, 5686, 38455, 5690, 5693, 5694, 5697, 5705, 5706, 5707, 5708, 5710, 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5721, 5725, 5727, 5732, 5733, 5735, 5736, 5739, 5740, 5741, 5743, 5746, 5747, 5751, 5753, 5754, 5756, 5757, 5758, 5762, 5765, 5767, 5770, 5776, 5779, 5780, 5784, 5785, 5788, 38558, 5791, 5793, 5794, 5795, 5798, 5802, 5803, 5807, 5808, 5810, 5813, 5815, 5817, 5818, 5820, 5822, 5825, 5826, 5827, 5828, 5835, 5838, 5842, 5848, 5849, 5850, 5852, 5855, 5857, 5858, 5859, 5863, 5865, 5867, 5871, 5873, 5875, 71119, 5880, 5881, 5883, 5886, 5894, 5896, 5897, 5898, 5899, 5900, 5902, 5903, 5905, 5907, 5908, 5909, 5911, 5914, 5915, 5917, 5918, 5920, 5934, 5936, 5942, 5943, 694072, 5950, 5951, 5953, 5954, 5961, 5965, 5968, 5969, 5971, 5973, 5975, 5980, 5988, 5989, 5993, 5995, 5997, 5998, 104303, 6000, 6001, 6002, 6005, 6007, 6013, 6015, 6019, 6022, 6024, 6027, 6028, 6033, 6034, 6035, 6036, 6042, 6044, 6048, 6052, 6056, 6057, 202666, 6060, 6062, 6064, 6065, 6067, 6068, 6069, 6071, 6072, 33780, 6074, 6076, 6078, 6081, 6083, 6084, 6085, 6087, 6094, 6095, 6096, 6098, 6101, 6108, 6112, 6113, 6123, 6124, 6128, 6129, 6131, 6133, 6134, 6137, 6139, 6141, 6142, 6143, 6145, 6147, 6149, 6152, 6154, 6155, 6156, 6164, 6165, 38935, 6170, 6174, 6178, 6180, 6181, 6182, 6185, 6188, 6189, 6190, 6200, 6202, 6204, 6206, 6207, 6209, 6210, 6212, 6214, 6218, 6219, 6220, 6221, 6224, 6225, 6227, 6228, 6232, 6233, 6237, 6240, 6243, 6244, 6248, 6251, 6253, 6257, 6259, 6260, 6261, 6265, 6268, 6269, 6270, 6273, 6277, 6279, 6282, 6287, 6294, 6297, 6298, 6300, 6302, 6303, 6307, 6316, 6321, 6324, 6325, 6326, 6329, 6338, 6344, 6349, 6351, 6356, 6360, 6361, 11983, 6365, 104677, 6379, 6380, 6391, 6394, 6396, 6397, 6399, 137473, 6403, 39172, 6407, 6409, 6412, 6420, 6428, 6429, 6431, 6433, 6438, 6440, 6445, 6446, 6450, 6459, 6469, 6474, 6475, 6478, 6480, 39250, 6485, 6487, 6492, 6494, 6495, 6498, 6500, 6502, 6503, 6504, 39276, 6510, 6519, 6521, 6525, 6526, 6527, 6531, 6532, 6536, 6541, 6545, 6547, 6548, 6549, 6550, 6551, 6562, 6565, 6567, 6568, 6574, 6578, 6584, 77556, 6586, 6588, 6589, 6594, 6595, 6596, 6602, 6606, 203215, 6608, 6610, 6617, 6621, 6622, 6628, 6630, 6632, 6636, 6643, 6647, 6648, 39423, 6658, 6660, 6662, 39432, 6667, 6669, 6672, 6674, 6676, 6681, 6682, 6683, 6686, 6688, 6689, 6690, 6691, 6696, 6706, 6708, 6710, 6714, 6721, 6723, 6726, 6727, 6730, 6732, 6734, 6735, 6743, 6749, 6751, 6756, 6759, 6764, 6770, 6772, 6773, 6774, 6780, 6788, 6791, 6792, 6795, 6796, 6804, 6807, 6810, 6814, 6815, 6817, 6819, 6822, 6826, 6828, 6829, 5683, 6835, 105141, 6841, 6844, 6846, 6847, 6850, 6858, 6859, 6860, 6861, 6865, 6869, 6872, 6607, 6876, 6878, 6880, 6883, 6885, 6888, 6891, 6898, 6904, 6905, 6906, 6907, 6914, 6920, 55767, 6928, 6930, 33923, 6938, 6939, 6944, 33926, 6952, 6957, 6958, 105267, 6970, 6972, 6974, 6977, 6987, 72524, 6991, 6994, 6997, 7000, 7012, 7013, 7016, 7017, 7018, 7021, 7025, 7026, 7034, 7035, 7036, 7037, 7039, 7040, 7045, 7047, 7049, 7052, 7056, 7057, 7060, 7063, 7066, 7067, 7069, 7077, 7084, 7091, 7093, 7095, 7097, 7098, 7100, 7101, 7102, 7106, 7108, 7110, 7113, 7119, 7122, 7124, 7125, 7131, 7133, 7134, 7136, 7138, 7139, 7140, 7143, 7147, 7148, 7151, 7152, 33960, 7154, 7160, 7162, 7164, 7166, 7169, 7173, 7174, 7179, 7180, 7181, 7182, 7183, 7188, 7189, 7191, 7192, 7193, 7199, 7202, 7208, 7209, 7211, 7214, 7216, 7222, 7230, 7233, 7242, 7245, 7247, 105559, 7259, 7262, 7263, 7264, 7268, 7269, 7270, 7272, 7273, 7274, 7277, 7281, 7288, 7289, 7295, 7299, 7302, 7304, 7317, 7318, 7319, 7326, 7346, 7349, 7352, 7354, 7358, 7359, 7362, 7365, 7366, 7367, 7368, 7369, 7373, 7376, 7377, 7379, 7382, 7392, 7393, 7394, 7398, 7399, 7400, 7411, 7413, 7414, 7419, 7423, 7426, 7427, 7429, 7432, 7433, 7434, 7435, 7437, 7438, 7439, 7443, 7446, 7451, 7452, 7456, 7457, 7458, 7459, 7460, 7462, 7464, 7465, 7473, 7474, 7476, 7482, 7496, 7497, 7500, 7506, 7513, 7515, 7517, 7527, 7543, 7551, 7552, 7556, 7557, 7560, 7562, 7563, 7566, 7572, 7574, 7575, 7597, 7598, 7605, 7608, 7614, 7619, 7623, 7625, 7627, 7632, 7636, 7642, 7653, 7657, 7666, 7667, 40436, 7670, 7680, 7681, 7682, 7686, 7687, 263426, 7702, 7710, 7712, 7715, 7716, 7717, 7719, 7720, 7726, 7730, 7733, 597562, 7744, 7748, 7763, 7776, 7777, 7779, 40550, 7787, 7789, 40560, 7795, 7806, 7809, 7811, 7814, 7816, 7822, 7825, 7830, 7838, 7845, 7848, 106160, 7859, 7865, 7871, 7873, 7874, 7876, 7879, 7891, 7899, 7901, 34085, 7904, 7908, 7912, 7924, 7925, 7926, 7928, 7929, 7932, 7934, 7935, 7940, 7946, 7949, 7950, 34093, 7953, 7955, 7969, 7973, 7975, 7981, 7985, 7989, 7992, 7994, 7995, 7998, 8003, 110561, 8015, 8019, 8020, 8022, 8030, 8035, 8036, 8044, 8046, 8051, 8052, 8057, 132415, 8061, 8068, 8071, 8072, 8074, 8080, 8082, 8086, 8102, 8109, 8113, 8114, 8116, 8133, 8136, 8146, 8152, 8154, 8164, 8171, 8174, 8175, 8187, 8191, 8200, 8202, 8205, 8208, 8210, 8211, 8219, 8221, 8224, 8226, 8250, 8251, 8254, 8259, 8261, 8269, 8279, 8288, 8293, 8299, 8303, 8305, 8311, 8312, 8320, 8322, 8325, 8330, 8332, 8334, 8341, 8344, 8347, 172188, 106654, 8351, 8356, 8357, 8360, 8361, 41131, 8364, 8365, 8370, 8377, 8383, 8388, 8397, 8400, 8401, 8405, 8407, 8412, 8413, 8414, 8415, 8418, 8424, 34172, 8435, 8444, 8445, 8449, 8456, 8460, 8465, 8466, 8469, 8471, 8476, 8479, 8487, 8488, 8507, 8513, 8514, 8522, 8528, 8530, 8534, 8535, 8537, 198031, 8540, 8545, 8547, 8550, 8557, 8558, 8559, 8563, 8566, 8568, 8573, 8578, 8580, 8583, 8585, 41354, 8595, 8596, 8598, 8604, 8607, 8625, 8628, 8633, 8637, 8646, 8653, 8654, 8655, 8661, 8664, 41433, 8667, 8670, 8674, 8675, 8681, 33023, 8693, 8694, 8707, 8712, 8714, 8715, 8716, 8727, 8730, 8731, 8735, 8739, 8745, 8746, 8749, 8757, 8758, 8759, 8773, 8775, 8779, 8780, 8782, 41551, 8793, 8795, 8796, 8799, 8804, 8812, 8813, 6931, 8820, 8822, 8826, 8829, 8833, 8839, 8850, 8851, 8852, 8864, 8878, 41655, 8889, 8895, 8897, 8900, 8910, 8915, 8929, 41701, 41702, 8935, 8937, 8940, 8946, 8949, 8951, 8954, 8957, 8959, 8962, 8971, 8974, 8979, 8987, 500511, 8993, 8996, 8999, 9001, 6963, 9013, 9020, 9026, 9028, 9029, 9035, 9036, 9042, 34275, 9044, 172887, 9050, 9060, 9065, 9067, 9069, 9075, 9081, 9087, 9088, 9104, 9113, 9117, 9131, 9132, 9134, 9146, 9147, 9149, 9154, 9162, 9167, 9168, 9175, 9195, 9200, 9203, 9209, 9212, 41984, 9227, 9230, 9233, 9237, 9248, 9249, 9259, 9261, 9262, 9290, 9291, 9292, 9294, 9308, 9314, 9320, 9329, 9331, 9339, 9340, 9342, 9346, 9364, 9365, 9369, 9386, 9395, 173243, 9417, 9423, 9428, 9436, 9439, 9442, 9443, 9446, 9477, 9478, 9480, 9481, 9487, 9491, 9501, 9504, 9522, 42307, 9540, 9546, 42326, 34361, 9560, 9567, 9568, 9573, 9582, 9587, 1147097, 9597, 9604, 9610, 9611, 9618, 9619, 9620, 9625, 9627, 9634, 9636, 9639, 9641, 9648, 9653, 9655, 107963, 9679, 9689, 9703, 9710, 9712, 9714, 9715, 9728, 9733, 9735, 9737, 9740, 9744, 9748, 42520, 9754, 75295, 9768, 9773, 108078, 9777, 9802, 9816, 9821, 9826, 75365, 9830, 9844, 9845, 9851, 9857, 9859, 9868, 9879, 9881, 9884, 9886, 9889, 9891, 42664, 9898, 9904, 9910, 9927, 9933, 9936, 9960, 9964, 9966, 9967, 9973, 9974, 9993, 75535, 10006, 10008, 10012, 10014, 10018, 10020, 10023, 10048, 10061, 10064, 10067, 10068, 10072, 10080, 10082, 10085, 10088, 10098, 10099, 10101, 10102, 10108, 10124, 10126, 10131, 10137, 10141, 10147, 10151, 10160, 10164, 10189, 141264, 10193, 10195, 10203, 206816, 10212, 10216, 10219, 10221, 10225, 10229, 10230, 10235, 10237, 10257, 10260, 10264, 10272, 10274, 10280, 1717, 10307, 10316, 10317, 10319, 10325, 10326, 10328, 10329, 10332, 10334, 10338, 10339, 10341, 10344, 10352, 75895, 43131, 10384, 10386, 43158, 10393, 10405, 10419, 10421, 10425, 207052, 10446, 10447, 10448, 10449, 10450, 43227, 10464, 10479, 10486, 10489, 10491, 10497, 10504, 10514, 10519, 10533, 10535, 10569, 10577, 10583, 10586, 10595, 10604, 10607, 10608, 43383, 10625, 10626, 10636, 76182, 10648, 10654, 10656, 10661, 10664, 10665, 10666, 10671, 10674, 10683, 10686, 10689, 10694, 10698, 10700, 43476, 174554, 10726, 10740, 10749, 10754, 10758, 43527, 10761, 10800, 10811, 10813, 10818, 10830, 10835, 76381, 10861, 10864, 10877, 174722, 174724, 174734, 10897, 10901, 10906, 76445, 10912, 699042, 174756, 10917, 174765, 10927, 10944, 10961, 10967, 33099, 10981, 11005, 11006, 11026, 11028, 11036, 11045, 11048, 11049, 11078, 11088, 100156, 11128, 11139, 11160, 11163, 11165, 11178, 11190, 11192, 11193, 11204, 11210, 65841, 11219, 11241, 11246, 11258, 11260, 11263, 11268, 11271, 11274, 11277, 11291, 11294, 11312, 11313, 11317, 11331, 11337, 11339, 11349, 11356, 11379, 11402, 11404, 67439, 11439, 11444, 11463, 11467, 11476, 11488, 11491, 44261, 11499, 11500, 11522, 11525, 11535, 77081, 11560, 44336, 11571, 11574, 11577, 11585, 11591, 11607, 11614, 11628, 33121, 11638, 11667, 11671, 11688, 1948, 11690, 11692, 11710, 44484, 11728, 11729, 11730, 11741, 44517, 11751, 11755, 11793, 11794, 11804, 11819, 11827, 11831, 11832, 11851, 11875, 44646, 11886, 11896, 11899, 11921, 11926, 11932, 11937, 1997, 11991, 11998, 12000, 12002, 12003, 12009, 12020, 12023, 12027, 12031, 12038, 12039, 12053, 12087, 12102, 12103, 12106, 44896, 12134, 12159, 12185, 12186, 44957, 12210, 12223, 12237, 12252, 12256, 12257, 12264, 12267, 2046, 12278, 12286, 12294, 12299, 12301, 12304, 12307, 12308, 12318, 12350, 12357, 12368, 12391, 12392, 5538, 12404, 12408, 12411, 12413, 12419, 12423, 12424, 12429, 12431, 12435, 12444, 12456, 2076, 12464, 12472, 12473, 12488, 12505, 12513, 12522, 12527, 78070, 12546, 12549, 12551, 12553, 12573, 12595, 12610, 12618, 12633, 12641, 12649, 12655, 12660, 143752, 5878, 12698, 12700, 45472, 12706, 12717, 12736, 12739, 12742, 12753, 12772, 2131, 12817, 1225234, 12820, 12830, 12844, 12845, 12858, 12872, 12881, 45652, 12885, 12887, 12895, 143973, 12920, 12927, 12936, 12973, 12983, 12984, 12992, 13002, 13016, 13017, 13022, 45861, 13038, 13044, 13053, 13057, 13061, 34945, 13066, 13080, 13089, 13093, 13115, 13137, 13140, 13144, 13161, 13188, 13194, 13195, 45965, 13198, 144271, 13203, 65703, 46014, 13261, 13264, 13274, 13284, 13287, 13294, 13304, 13306, 13317, 13351, 13357, 13367, 13378, 13382, 111688, 13391, 13392, 13395, 13398, 13406, 13462, 13481, 13486, 13499, 13507, 13549, 13552, 13553, 13556, 13560, 13561, 13575, 13577, 13579, 79125, 13596, 2270, 46411, 13664, 13666, 13691, 79229, 13695, 13717, 13738, 13747, 13759, 13764, 13778, 13798, 13808, 13822, 46592, 177687, 13855, 13856, 13873, 46644, 13877, 13881, 13884, 13895, 144980, 13924, 2323, 13975, 13976, 538281, 13997, 14001, 14016, 14021, 14034, 14036, 14039, 14042, 14050, 14061, 14095, 46880, 14115, 14125, 14129, 79680, 14161, 14183, 14220, 14224, 14245, 14249, 14254, 14264, 14269, 14276, 79818, 14287, 14315, 14336, 14342, 14370, 14378, 14394, 14438, 14445, 14447, 47237, 14474, 14482, 80030, 14503, 14504, 14506, 14512, 14533, 14540, 309453, 14544, 14552, 14575, 14600, 14612, 295092, 14628, 14650, 14663, 14668, 14676, 14677, 14703, 80246, 14727, 113039, 2456, 14750, 14751, 14752, 14759, 14769, 14788, 14798, 14802, 14859, 14860, 14890, 14904, 131432, 14922, 14923, 14947, 14952, 14954, 14964, 15009, 15029, 15069, 47839, 15077, 15085, 15090, 15114, 15116, 35295, 15167, 15174, 15176, 15181, 15207, 15222, 48006, 15267, 15300, 15306, 15322, 867308, 15341, 15359, 15372, 15376, 15381, 15408, 15413, 15428, 15430, 15440, 15452, 15464, 15469, 15478, 15502, 15504, 15510, 15521, 15524, 15529, 15536, 48309, 15547, 15549, 113879, 277722, 15580, 15623, 2604, 15640, 15690, 32838, 15708, 15731, 15737, 48509, 15792, 2635, 15816, 15857, 48639, 15872, 38718, 15914, 15922, 35422, 15930, 15941, 15942, 48716, 48726, 15991, 16014, 147105, 16050, 16055, 81592, 16066, 16067, 16076, 16081, 16087, 16109, 16111, 16112, 16166, 16175, 2701, 16208, 48980, 16218, 16221, 16222, 16233, 101012, 35477, 16256, 16258, 16292, 16297, 13643, 49107, 16346, 16357, 114666, 16363, 16393, 16452, 16459, 16461, 49233, 16468, 16485, 16494, 16500, 2754, 16534, 16551, 114880, 16584, 16606, 16618, 49391, 114928, 16653, 16655, 16704, 16725, 16750, 16762, 16770, 16773, 16817, 16850, 49630, 16865, 16868, 16872, 16875, 16921, 16938, 16946, 16992, 17002, 17015, 49795, 17028, 17051, 17061, 17064, 49840, 17113, 17114, 17120, 17121, 17161, 17192, 17195, 17212, 17234, 17246, 17247, 38304, 50041, 50047, 17286, 17294, 17307, 50091, 50093, 50095, 115632, 17333, 50104, 17339, 17363, 17375, 17400, 17424, 17440, 30214, 83006, 17495, 17528, 115849, 17550, 50319, 17558, 17561, 50332, 17619, 17635, 17663, 17664, 17669, 17696, 17699, 17717, 17737, 116062, 17793, 17796, 17810, 50581, 17831, 116173, 17921, 41217, 17935, 17949, 17963, 17984, 50790, 295206, 18086, 3017, 18118, 18125, 18148, 116515, 18213, 35804, 35805, 18274, 18284, 33345, 18372, 18412, 18444, 18455, 18460, 18463, 18471, 35849, 18488, 18501, 18526, 18531, 68627, 18548, 18558, 18559, 18580, 18601, 18632, 84194, 18679, 84238, 903445, 18710, 18720, 18727, 18744, 18771, 18801, 18815, 51608, 18846, 18854, 18921, 51691, 18930, 70463, 19015, 19018, 19034, 19055, 19058, 19070, 19074, 19081, 19106, 51875, 19110, 19127, 19131, 19144, 19158, 51946, 8665, 52005, 19238, 215871, 19278, 19296, 19311, 150387, 19316, 19366, 19384, 19394, 19414, 19463, 19467, 19472, 19519, 19571, 85125, 19599, 412853, 19700, 19707, 19720, 19727, 19746, 19747, 52534, 19772, 19809, 19819, 19835, 19891, 118213, 19910, 19920, 19927, 8783, 19938, 19979, 52753, 52785, 20028, 20056, 20064, 20082, 20138, 20139, 20186, 20199, 20246, 20297, 151380, 53087, 20336, 20382, 20392, 20431, 249833, 20464, 20468, 20474, 20482, 20512, 20521, 20529, 53329, 20565, 20800, 20836, 20861, 20863, 20889, 20898, 20899, 20907, 3493, 21018, 21066, 21142, 3526, 3528, 21188, 21240, 21248, 21289, 71634, 54155, 21408, 21415, 21416, 21471, 218104, 21501, 21553, 21558, 316489, 21583, 21589, 21610, 54405, 349322, 21678, 3615, 21695, 21705, 21706, 21710, 21714, 21715, 54492, 21780, 54562, 54596, 21865, 153008, 120319, 22022, 22050, 22051, 22076, 22149, 54923, 22176, 54950, 22187, 22191, 65615, 22215, 22272, 87887, 87922, 22402, 22429, 22490, 153581, 22521, 69290, 22556, 22566, 22590, 22595, 317542, 22633, 22663, 22688, 22693, 22719, 69324, 22735, 55504, 55505, 907474, 36562, 22782, 22789, 22805, 121120, 55596, 3805, 25662, 121284, 134905, 23011, 23027, 23050, 23057, 55852, 23088, 23140, 23155, 55924, 23164, 23221, 23264, 23280, 23305, 56088, 154427, 23358, 23363, 23388, 36671, 23474, 23486, 23514, 56312, 23550, 89113, 23586, 23619, 23639, 23695, 23705, 23744, 23766, 23781, 23808, 56689, 56707, 56746, 23992, 24007, 24016, 89553, 24080, 25863, 56887, 24153, 56938, 24223, 24251, 22630, 24270, 24365, 24387, 102375, 24464, 24467, 1925074, 24533, 184314, 24595, 24600, 24606, 90171, 69642, 24656, 254078, 32898, 24710, 24717, 9594, 24820, 24851, 24923, 24951, 90491, 24983, 25049, 90642, 25160, 25170, 58019, 26055, 25272, 25281, 647905, 25335, 25343, 221959, 58121, 25409, 58184, 25455, 189322, 25584, 4268, 25657, 58430, 222296, 25735, 9752, 25802, 25812, 91382, 58631, 58633, 58634, 58649, 26012, 288174, 26034, 26042, 124359, 26116, 26168, 26177, 26233, 91787, 48074, 4395, 288395, 26501, 26565, 452639, 26760, 26796, 26804, 26833, 26834, 26879, 59705, 92487, 26957, 420213, 2255233, 42732, 27090, 27171, 27207, 27211, 27215, 92788, 27261, 27267, 60070, 92938, 27421, 27435, 60225, 27541, 191386, 6373, 27556, 60326, 27574, 60358, 27596, 60372, 27661, 60434, 27668, 27689, 60495, 27736, 27753, 27814, 60604, 60613, 27850, 102956, 27939, 126348, 28070, 28073, 257464, 60938, 28215, 93797, 192102, 4711, 28283, 28303, 28417, 61205, 28470, 28473, 61259, 61263, 28517, 28547, 28588, 37544, 28846, 28875, 28924, 61705, 234208, 29044, 61832, 43100, 4889, 29402, 62174, 29421, 29434, 127765, 37679, 33715, 62264, 291656, 29520, 29534, 389995, 28262, 4927, 29586, 29616, 29719, 29783, 29792, 29793, 29806, 29859, 29871, 29951, 29959, 62728, 29971, 30005, 30019, 62790, 62982, 30299, 43282, 30335, 30406, 30412, 30426, 30474, 227134, 30530, 30606, 63383, 5112, 30772, 31009, 31155, 129466, 31177, 31222, 31265, 129595, 31358, 64141, 31375, 5232, 31407, 31431, 31443, 31457, 64247, 33833, 97087, 129981, 31705, 31791, 31795, 64680, 130220, 5328, 5339, 32086, 32096, 32097, 32141, 32151, 622083, 10845, 65086, 67635, 5394, 163457, 32408, 196309, 65296, 32638, 103745, 32663, 32705, 32720, 32725, 10916"; + String countsOfCountsExampleDataountOfCountsEstimator estimator = CountOfCountsEstimator.regress(doublesFromString(countsExampleData), doublesFromString(countsOfCountsExampleData)); + + Assert.assertEquals(estimator.getSlope(), -2.18515607, 0.000001); + Assert.assertEquals(estimator.getIntercept(), 18.41044205, 0.000001); + } + + @Test + public void testPrediction() { + final double slope = -2.18515607; + final double intercept = 18.41044205; + CountOfCountsEstimator estimator = new CountOfCountsEstimator(slope, intercept); + Assert.assertEquals(estimator.getEstimatedCountOfCount(1), 98981354.304301515, 0.0001); + Assert.assertEquals(estimator.getEstimatedCountOfCount(100), 4219.3082200962008, 0.0001); + Assert.assertEquals(estimator.getRoundedCountOfCount(100), 4219); //non-rounded: 4219.3082200962008 + Assert.assertEquals(estimator.getRoundedCountOfCount(1000), 28); //non-rounded: 27.547689656315786 + } + +} From eb9d61d2bf443d9bed0a7bf2464385360881b396 Mon Sep 17 00:00:00 2001 From: Matt Post Date: Wed, 7 Sep 2016 15:16:49 -0400 Subject: [PATCH 5/6] bugfix: use a single reduce for count of rule counts An attempt at setting this was in the code, but the constant was overriden with a query to the config file. --- .../hadoop/features/mapred/CountOfRuleCountsEstimationJob.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java b/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java index c54d1fb..b8a5412 100644 --- a/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java +++ b/src/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java @@ -69,8 +69,7 @@ public Job getJob(Configuration conf) throws IOException { job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); - int num_reducers = conf.getInt("thrax.reducers", SINGLE_REDUCER); - job.setNumReduceTasks(num_reducers); + job.setNumReduceTasks(SINGLE_REDUCER); FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "rules")); FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + name)); From 1ac59b967abeeeb648abd165894cad6add6c1586 Mon Sep 17 00:00:00 2001 From: Lewis John McGibbney Date: Wed, 5 Oct 2016 21:43:10 -0700 Subject: [PATCH 6/6] Fix compilation issues, comply with forbidden API's discrepancies and implement slf4j-over-log4j logging --- .classpath | 13 - .gitignore | 6 +- .project | 17 - pom.xml | 16 +- src/main/java/edu/jhu/thrax/Thrax.java | 21 +- .../jhu/thrax/datatypes/HierarchicalRule.java | 5 +- .../java/edu/jhu/thrax/datatypes/IntPair.java | 4 +- .../edu/jhu/thrax/datatypes/PhrasePair.java | 365 +++++++++--------- .../ContextPhraseExtractor.java | 12 +- .../jhu/thrax/distributional/FeatureSet.java | 8 +- .../extraction/HierarchicalRuleExtractor.java | 8 +- .../edu/jhu/thrax/extraction/SAMTLabeler.java | 6 +- .../hadoop/distributional/CommonLSH.java | 8 +- .../distributional/ContextWritable.java | 26 +- .../hadoop/extraction/ExtractionMapper.java | 5 +- .../WordLexicalProbabilityCalculator.java | 7 +- ...eGivenTargetLexicalProbabilityFeature.java | 6 +- ...tGivenSourceLexicalProbabilityFeature.java | 6 +- .../CountOfRuleCountsEstimationJob.java | 135 +++++++ ...moothedSourcePhraseGivenTargetFeature.java | 118 ++++++ ...moothedTargetPhraseGivenSourceFeature.java | 118 ++++++ .../mapred/coc/CountOfCountsEstimator.java | 118 ++++++ .../mapred/coc/GoodTuringSmoother.java | 15 + .../features/mapred/coc/package-info.java | 8 + .../hadoop/jobs/FeatureCollectionJob.java | 6 +- .../edu/jhu/thrax/hadoop/jobs/Scheduler.java | 8 +- .../hadoop/paraphrasing/PivotingReducer.java | 6 +- .../thrax/hadoop/tools/ExtractionTool.java | 14 +- .../jhu/thrax/hadoop/tools/FeatureTool.java | 17 +- .../jhu/thrax/hadoop/tools/OutputTool.java | 12 +- ...rceWordGivenTargetWordProbabilityTool.java | 15 +- ...getWordGivenSourceWordProbabilityTool.java | 16 +- .../thrax/lexprob/HashMapLexprobTable.java | 5 +- .../edu/jhu/thrax/lexprob/LexprobTest.java | 11 +- .../edu/jhu/thrax/lexprob/TableEntry.java | 4 +- .../jhu/thrax/lexprob/TrieLexprobTable.java | 10 +- .../edu/jhu/thrax/syntax/LatticeArray.java | 19 +- .../java/edu/jhu/thrax/syntax/ParseTree.java | 11 +- .../jhu/thrax/tools/ExtractPropbankRules.java | 5 +- .../edu/jhu/thrax/tools/JudgeParaphrases.java | 21 +- .../jhu/thrax/tools/ParaphraseCoverage.java | 32 +- .../jhu/thrax/tools/ParaphraseIntersect.java | 34 +- .../jhu/thrax/tools/ParaphraseOverlap.java | 30 +- .../edu/jhu/thrax/tools/ParaphraseScore.java | 31 +- .../jhu/thrax/tools/ParaphraseWordNet.java | 30 +- .../jhu/thrax/tools/SequenceToGrammar.java | 22 +- .../jhu/thrax/tools/SequenceToSignatures.java | 28 +- .../edu/jhu/thrax/tools/SplitAndFilter.java | 29 +- .../edu/jhu/thrax/util/ConfFileParser.java | 7 +- .../edu/jhu/thrax/util/CreateGlueGrammar.java | 20 +- .../java/edu/jhu/thrax/util/FormatUtils.java | 13 +- .../edu/jhu/thrax/util/GrammarComparison.java | 14 +- .../java/edu/jhu/thrax/util/HdfsUtils.java | 49 +++ .../java/edu/jhu/thrax/util/Intersect.java | 4 +- .../edu/jhu/thrax/util/TestSetFilter.java | 75 ++-- .../edu/jhu/thrax/util/io/LineReader.java | 12 +- src/main/resources/log4j.properties | 65 ++++ 57 files changed, 1231 insertions(+), 495 deletions(-) delete mode 100644 .classpath delete mode 100644 .project create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java create mode 100644 src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/package-info.java create mode 100644 src/main/java/edu/jhu/thrax/util/HdfsUtils.java create mode 100644 src/main/resources/log4j.properties diff --git a/.classpath b/.classpath deleted file mode 100644 index 83c6ad5..0000000 --- a/.classpath +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - diff --git a/.gitignore b/.gitignore index bd90d3a..216fe8b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,8 @@ test-output/ doc/ AwsCredentials.properties -.DS_Store \ No newline at end of file +.DS_Store +.project +.classpath +.settings/ +target diff --git a/.project b/.project deleted file mode 100644 index 3578d81..0000000 --- a/.project +++ /dev/null @@ -1,17 +0,0 @@ - - - Thrax - - - - - - org.eclipse.jdt.core.javabuilder - - - - - - org.eclipse.jdt.core.javanature - - diff --git a/pom.xml b/pom.xml index 0dc9c97..934c2ff 100644 --- a/pom.xml +++ b/pom.xml @@ -324,9 +324,9 @@ ${scmBranch}@r${buildNumber} - 1.7 - 1.7 - 1.7 + 1.8 + 1.8 + 1.8 yyyy-MM-dd HH:mm:ssZ false ${project.build.finalName} @@ -360,6 +360,16 @@ jerboa 1.0.0 + + org.slf4j + slf4j-api + 1.7.21 + + + org.slf4j + slf4j-log4j12 + 1.7.21 + org.testng testng diff --git a/src/main/java/edu/jhu/thrax/Thrax.java b/src/main/java/edu/jhu/thrax/Thrax.java index e3ad644..d333d97 100644 --- a/src/main/java/edu/jhu/thrax/Thrax.java +++ b/src/main/java/edu/jhu/thrax/Thrax.java @@ -12,6 +12,8 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.features.annotation.AnnotationFeature; import edu.jhu.thrax.hadoop.features.annotation.AnnotationFeatureFactory; @@ -36,6 +38,7 @@ import edu.jhu.thrax.util.ConfFileParser; public class Thrax extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(Thrax.class); private Scheduler scheduler; private Configuration conf; @@ -44,7 +47,7 @@ public class Thrax extends Configured implements Tool { public synchronized int run(String[] argv) throws Exception { if (argv.length < 1) { - System.err.println("usage: Thrax [output path]"); + LOG.error("usage: Thrax [output path]"); return RETURN_CODE_FAILED; } // do some setup of configuration @@ -78,12 +81,10 @@ public synchronized int run(String[] argv) throws Exception { } wait(); } while (scheduler.notFinished()); - System.err.print(scheduler); + LOG.info(scheduler.toString()); if (scheduler.getClassesByState(JobState.SUCCESS).size() == scheduler.numJobs()) { - System.err.println("Work directory was " + workDir); - System.err.println("To retrieve grammar:"); - System.err.println("hadoop fs -getmerge " + conf.get("thrax.outputPath", "") - + " "); + LOG.info("Work directory was {}", workDir); + LOG.info("To retrieve grammar: hadoop fs -getmerge {} ", conf.get("thrax.outputPath", "")); return RETURN_CODE_SUCCESS; } else { return RETURN_CODE_FAILED; @@ -99,7 +100,7 @@ private synchronized void scheduleJobs() throws SchedulerException { String type = conf.get("thrax.type", "translation"); String features = BackwardsCompatibility.equivalent(conf.get("thrax.features", "")); - System.err.println("Running in mode: " + type); + LOG.info("Running in mode: {}", type); scheduler.schedule(VocabularyJob.class); @@ -166,7 +167,7 @@ private synchronized void scheduleJobs() throws SchedulerException { scheduler.schedule(DistributionalContextSortingJob.class); scheduler.percolate(DistributionalContextSortingJob.class); } else { - System.err.println("Unknown grammar type. No jobs scheduled."); + LOG.error("Unknown grammar type. No jobs scheduled."); } } @@ -179,7 +180,7 @@ protected synchronized void workerDone(Class theClass, boole try { scheduler.setState(theClass, success ? JobState.SUCCESS : JobState.FAILED); } catch (SchedulerException e) { - System.err.println(e.getMessage()); + LOG.error(e.getMessage()); } notify(); return; @@ -201,7 +202,7 @@ public void run() { job.waitForCompletion(false); thrax.workerDone(theClass, job.isSuccessful()); } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage()); thrax.workerDone(theClass, false); } return; diff --git a/src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java b/src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java index a880c2a..1c2e22b 100644 --- a/src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java +++ b/src/main/java/edu/jhu/thrax/datatypes/HierarchicalRule.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; +import java.util.Locale; import edu.jhu.thrax.extraction.SpanLabeler; import edu.jhu.thrax.util.Vocabulary; @@ -67,9 +68,9 @@ public HierarchicalRule addNonterminal(PhrasePair pp) { public String toString() { StringBuilder sb = new StringBuilder(); sb.append("HierarchicalRule { "); - sb.append(String.format("lhs:%s ", lhs)); + sb.append(String.format(Locale.ROOT, "lhs:%s ", lhs)); for (int i = 0; i < nts.length; i++) - sb.append(String.format("%d:%s ", i, nts[i])); + sb.append(String.format(Locale.ROOT, "%d:%s ", i, nts[i])); sb.append("}"); return sb.toString(); } diff --git a/src/main/java/edu/jhu/thrax/datatypes/IntPair.java b/src/main/java/edu/jhu/thrax/datatypes/IntPair.java index 6ad8c8d..fda8616 100644 --- a/src/main/java/edu/jhu/thrax/datatypes/IntPair.java +++ b/src/main/java/edu/jhu/thrax/datatypes/IntPair.java @@ -1,5 +1,7 @@ package edu.jhu.thrax.datatypes; +import java.util.Locale; + import edu.jhu.thrax.util.FormatUtils; /** @@ -57,7 +59,7 @@ public static IntPair fromHyphenatedString(String s) public String toString() { - return String.format("(%d,%d)", fst, snd); + return String.format(Locale.ROOT, "(%d,%d)", fst, snd); } public boolean equals(Object o) diff --git a/src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java b/src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java index 1496177..5e0a68f 100644 --- a/src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java +++ b/src/main/java/edu/jhu/thrax/datatypes/PhrasePair.java @@ -1,6 +1,7 @@ package edu.jhu.thrax.datatypes; import java.util.Iterator; +import java.util.Locale; import edu.jhu.thrax.extraction.SpanLabeler; @@ -10,188 +11,188 @@ */ public class PhrasePair { - /** - * The index of the start of the source side of this PhrasePair. - */ - public final int sourceStart; - /** - * One plus the index of the end of the source side of this PhrasePair. - */ - public final int sourceEnd; - /** - * The index of the start of the target side of this PhrasePair. - */ - public final int targetStart; - /** - * One plus the index of the end of the target side of this PhrasePair. - */ - public final int targetEnd; - - /** - * Constructor. - * - * @param ss source start - * @param se source end - * @param ts target start - * @param te target end - */ - public PhrasePair(int ss, int se, int ts, int te) - { - sourceStart = ss; - sourceEnd = se; - targetStart = ts; - targetEnd = te; + /** + * The index of the start of the source side of this PhrasePair. + */ + public final int sourceStart; + /** + * One plus the index of the end of the source side of this PhrasePair. + */ + public final int sourceEnd; + /** + * The index of the start of the target side of this PhrasePair. + */ + public final int targetStart; + /** + * One plus the index of the end of the target side of this PhrasePair. + */ + public final int targetEnd; + + /** + * Constructor. + * + * @param ss source start + * @param se source end + * @param ts target start + * @param te target end + */ + public PhrasePair(int ss, int se, int ts, int te) + { + sourceStart = ss; + sourceEnd = se; + targetStart = ts; + targetEnd = te; + } + + /** + * Determines if another PhrasePair is contained (non-strictly) within + * this PhrasePair. Another PhrasePair is contained non-strictly if none + * of its boundary points lie outside of this PhrasePair. + * + * @param other the other PhrasePair + * @return true if other is contained non-strictly in this PhrasePar, + * false if at least one point lies outside + */ + public boolean contains(PhrasePair other) + { + return other.sourceStart >= sourceStart + && other.sourceEnd <= sourceEnd + && other.targetStart >= targetStart + && other.targetEnd <= targetEnd; + } + + /** + * Determine if this PhrasePair can be considered as an initial phrase + * pair according to a particular alignment. A phrase pair is called an + * initial phrase pair if the following conditions are satisfied: + *

+ * 1) no source words are aligned outside the target span of the phrase + * 2) no target words are aligned outside the source span of the phrase + * 3) a certain number of alignment points are present in the phrase pair + *

+ * In addition, we may optionally specify that only the smallest phrase + * pair with the same alignment is kept: that is, we may disallow the + * presence of unaligned words at the edges of the PhrasePair. + * + * @param a the Alignment + * @param allowUnaligned whether to allow unaligned words at the edges of + * initial phrase pairs + * @param minimumAligned the minimum number of alignment points needed + * @return true if this is an initial phrase pair, false otherwise + */ + public boolean isInitialPhrasePair(Alignment a, boolean allowUnaligned, int minimumAligned) + { + int numLinks = 0; + for (int i = sourceStart; i < sourceEnd; i++) { + Iterator js = a.targetIndicesAlignedTo(i); + while (js.hasNext()) { + numLinks++; + int j = js.next(); + if (j < targetStart || j >= targetEnd) + return false; + } } - - /** - * Determines if another PhrasePair is contained (non-strictly) within - * this PhrasePair. Another PhrasePair is contained non-strictly if none - * of its boundary points lie outside of this PhrasePair. - * - * @param other the other PhrasePair - * @return true if other is contained non-strictly in this PhrasePar, - * false if at least one point lies outside - */ - public boolean contains(PhrasePair other) - { - return other.sourceStart >= sourceStart - && other.sourceEnd <= sourceEnd - && other.targetStart >= targetStart - && other.targetEnd <= targetEnd; - } - - /** - * Determine if this PhrasePair can be considered as an initial phrase - * pair according to a particular alignment. A phrase pair is called an - * initial phrase pair if the following conditions are satisfied: - *

- * 1) no source words are aligned outside the target span of the phrase - * 2) no target words are aligned outside the source span of the phrase - * 3) a certain number of alignment points are present in the phrase pair - *

- * In addition, we may optionally specify that only the smallest phrase - * pair with the same alignment is kept: that is, we may disallow the - * presence of unaligned words at the edges of the PhrasePair. - * - * @param a the Alignment - * @param allowUnaligned whether to allow unaligned words at the edges of - * initial phrase pairs - * @param minimumAligned the minimum number of alignment points needed - * @return true if this is an initial phrase pair, false otherwise - */ - public boolean isInitialPhrasePair(Alignment a, boolean allowUnaligned, int minimumAligned) - { - int numLinks = 0; - for (int i = sourceStart; i < sourceEnd; i++) { - Iterator js = a.targetIndicesAlignedTo(i); - while (js.hasNext()) { - numLinks++; - int j = js.next(); - if (j < targetStart || j >= targetEnd) - return false; - } - } - for (int j = targetStart; j < targetEnd; j++) { - Iterator is = a.sourceIndicesAlignedTo(j); - while (is.hasNext()) { - numLinks++; - int i = is.next(); - if (i < sourceStart || i >= sourceEnd) - return false; - } - } - return numLinks >= minimumAligned && (allowUnaligned || isMinimal(a)); - } - - private boolean isMinimal(Alignment a) - { - return a.sourceIndexIsAligned(sourceStart) - && a.sourceIndexIsAligned(sourceEnd - 1) - && a.targetIndexIsAligned(targetStart) - && a.targetIndexIsAligned(targetEnd - 1); - } - - public int sourceLength() - { - return sourceEnd - sourceStart; - } - - public int targetLength() - { - return targetEnd - targetStart; - } - - public int numAlignmentPoints(Alignment a) - { - if (sourceLength() < targetLength()) - return countAlignmentPointsSource(a); - else - return countAlignmentPointsTarget(a); - } - - private int countAlignmentPointsSource(Alignment a) - { - int result = 0; - for (int i = sourceStart; i < sourceEnd; i++) - result += a.numTargetWordsAlignedTo(i); - return result; - } - - private int countAlignmentPointsTarget(Alignment a) - { - int result = 0; - for (int j = targetStart; j < targetEnd; j++) - result += a.numSourceWordsAlignedTo(j); - return result; - } - - public String toString() - { - return String.format("[%d,%d)+[%d,%d)", sourceStart, sourceEnd, targetStart, targetEnd); - } - - public boolean equals(Object o) - { - if (this == o) - return true; - if (!(o instanceof PhrasePair)) - return false; - PhrasePair p = (PhrasePair) o; - return sourceStart == p.sourceStart - && sourceEnd == p.sourceEnd - && targetStart == p.targetStart - && targetEnd == p.targetEnd; + for (int j = targetStart; j < targetEnd; j++) { + Iterator is = a.sourceIndicesAlignedTo(j); + while (is.hasNext()) { + numLinks++; + int i = is.next(); + if (i < sourceStart || i >= sourceEnd) + return false; + } } - - public int hashCode() - { - int result = 37; - result *= 163 + sourceStart; - result *= 163 + sourceEnd; - result *= 163 + targetStart; - result *= 163 + targetEnd; - return result; - } - - public int getLabel(SpanLabeler labeler, boolean useSource) - { - if (useSource) - return labeler.getLabel(sourceStart, sourceEnd); - else - return labeler.getLabel(targetStart, targetEnd); - } - - public boolean sourceIsDisjointFrom(PhrasePair other) - { - if (other.sourceStart < sourceStart) - return other.sourceEnd <= sourceStart; - return other.sourceStart >= sourceEnd; - } - - public boolean targetIsDisjointFrom(PhrasePair other) - { - if (other.targetStart < targetStart) - return other.targetEnd <= targetStart; - return other.targetStart >= targetEnd; - } + return numLinks >= minimumAligned && (allowUnaligned || isMinimal(a)); + } + + private boolean isMinimal(Alignment a) + { + return a.sourceIndexIsAligned(sourceStart) + && a.sourceIndexIsAligned(sourceEnd - 1) + && a.targetIndexIsAligned(targetStart) + && a.targetIndexIsAligned(targetEnd - 1); + } + + public int sourceLength() + { + return sourceEnd - sourceStart; + } + + public int targetLength() + { + return targetEnd - targetStart; + } + + public int numAlignmentPoints(Alignment a) + { + if (sourceLength() < targetLength()) + return countAlignmentPointsSource(a); + else + return countAlignmentPointsTarget(a); + } + + private int countAlignmentPointsSource(Alignment a) + { + int result = 0; + for (int i = sourceStart; i < sourceEnd; i++) + result += a.numTargetWordsAlignedTo(i); + return result; + } + + private int countAlignmentPointsTarget(Alignment a) + { + int result = 0; + for (int j = targetStart; j < targetEnd; j++) + result += a.numSourceWordsAlignedTo(j); + return result; + } + + public String toString() + { + return String.format(Locale.ROOT, "[%d,%d)+[%d,%d)", sourceStart, sourceEnd, targetStart, targetEnd); + } + + public boolean equals(Object o) + { + if (this == o) + return true; + if (!(o instanceof PhrasePair)) + return false; + PhrasePair p = (PhrasePair) o; + return sourceStart == p.sourceStart + && sourceEnd == p.sourceEnd + && targetStart == p.targetStart + && targetEnd == p.targetEnd; + } + + public int hashCode() + { + int result = 37; + result *= 163 + sourceStart; + result *= 163 + sourceEnd; + result *= 163 + targetStart; + result *= 163 + targetEnd; + return result; + } + + public int getLabel(SpanLabeler labeler, boolean useSource) + { + if (useSource) + return labeler.getLabel(sourceStart, sourceEnd); + else + return labeler.getLabel(targetStart, targetEnd); + } + + public boolean sourceIsDisjointFrom(PhrasePair other) + { + if (other.sourceStart < sourceStart) + return other.sourceEnd <= sourceStart; + return other.sourceStart >= sourceEnd; + } + + public boolean targetIsDisjointFrom(PhrasePair other) + { + if (other.targetStart < targetStart) + return other.targetEnd <= targetStart; + return other.targetStart >= targetEnd; + } } diff --git a/src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java b/src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java index 448bf1f..da5a83d 100644 --- a/src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java +++ b/src/main/java/edu/jhu/thrax/distributional/ContextPhraseExtractor.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Locale; import java.util.TreeMap; import org.apache.commons.lang3.StringEscapeUtils; @@ -10,6 +11,8 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.syntax.LatticeArray; import edu.jhu.thrax.util.FormatUtils; @@ -20,6 +23,7 @@ @SuppressWarnings("unchecked") public class ContextPhraseExtractor { + private static final Logger LOG = LoggerFactory.getLogger(ContextPhraseExtractor.class); private final String G = "_"; @@ -142,12 +146,12 @@ public List extract(String input) throws MalformedInputException if (inputs.length < 6) throw new NotEnoughFieldsException(); parse = new LatticeArray(inputs[0].trim(), true); - lemma = FormatUtils.P_SPACE.split(inputs[1].trim().toLowerCase()); + lemma = FormatUtils.P_SPACE.split(inputs[1].trim().toLowerCase(Locale.ROOT)); size = lemma.length; if (size != parse.size()) throw new MalformedInputException(); - String[] ner_entries = FormatUtils.P_SPACE.split(inputs[2].trim().toLowerCase()); + String[] ner_entries = FormatUtils.P_SPACE.split(inputs[2].trim().toLowerCase(Locale.ROOT)); ner = new String[ner_entries.length]; if (ner.length != size) throw new MalformedInputException("NER: " + ner.length + " vs. Size: " + size); @@ -196,7 +200,7 @@ public List extract(String input) throws MalformedInputException } } } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage()); throw new MalformedInputException(); } return output; @@ -320,7 +324,7 @@ public static void main(String[] args) throws Exception { TreeMap feature_map = new TreeMap(); for (Writable fn : cp.getFeatures().keySet()) feature_map.put((Text) fn, ((IntWritable) cp.getFeatures().get(fn)).get()); - System.out.println(FormatUtils.contextPhraseToText(cp.getPhrase(), feature_map)); + LOG.info("{} {}", FormatUtils.contextPhraseToText(cp.getPhrase(), feature_map)); } } } diff --git a/src/main/java/edu/jhu/thrax/distributional/FeatureSet.java b/src/main/java/edu/jhu/thrax/distributional/FeatureSet.java index 7cf3bec..6572bc8 100644 --- a/src/main/java/edu/jhu/thrax/distributional/FeatureSet.java +++ b/src/main/java/edu/jhu/thrax/distributional/FeatureSet.java @@ -3,12 +3,16 @@ import java.util.HashSet; import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.thrax.distributional.FeatureTypes.Label; import edu.jhu.thrax.distributional.FeatureTypes.Type; import edu.jhu.thrax.util.FormatUtils; - public class FeatureSet { + + private static final Logger LOG = LoggerFactory.getLogger(FeatureSet.class); private Set features; @@ -22,7 +26,7 @@ public FeatureSet() { public void addFeatureClass(String entry) { String[] fields = FormatUtils.P_DASH.split(entry); for (String f : fields) { - System.err.println(f); + LOG.info(f); } } diff --git a/src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java b/src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java index e077e0b..4522e02 100644 --- a/src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java +++ b/src/main/java/edu/jhu/thrax/extraction/HierarchicalRuleExtractor.java @@ -5,6 +5,9 @@ import java.util.List; import java.util.Scanner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.thrax.datatypes.Alignment; import edu.jhu.thrax.datatypes.ArrayAlignment; import edu.jhu.thrax.datatypes.HierarchicalRule; @@ -28,6 +31,7 @@ public class HierarchicalRuleExtractor { private boolean allowMixed = true; private boolean allowFullSentenceRules = true; private PhrasePair fullSentencePhrasePair; + private static final Logger LOG = LoggerFactory.getLogger(HierarchicalRuleExtractor.class); public HierarchicalRuleExtractor() { // just use the defaults! @@ -182,9 +186,9 @@ public static void main(String[] argv) throws IOException { Alignment alignment = ArrayAlignment.fromString(parts[2], false); for (HierarchicalRule r : extractor.extract(source.length, target.length, alignment)) { if (labeler != null) - System.out.println(r.toString(source, target, labeler, true)); + LOG.info(r.toString(source, target, labeler, true)); else - System.out.println(r); + LOG.info(r.toString()); } } } diff --git a/src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java b/src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java index 613cee1..4400976 100644 --- a/src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java +++ b/src/main/java/edu/jhu/thrax/extraction/SAMTLabeler.java @@ -2,6 +2,9 @@ import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.thrax.syntax.ParseTree; import edu.jhu.thrax.util.Vocabulary; @@ -12,6 +15,7 @@ public class SAMTLabeler implements SpanLabeler { private boolean allowConcat = true; private boolean allowDoubleConcat = true; private UnaryCategoryHandler unaryCategoryHandler; + private static final Logger LOG = LoggerFactory.getLogger(SAMTLabeler.class); private ParseTree tree; private int defaultLabel; @@ -25,7 +29,7 @@ public SAMTLabeler(String parse, boolean constituent, boolean ccg, boolean conca defaultLabel = def; unaryCategoryHandler = UnaryCategoryHandler.fromString(unary); tree = ParseTree.fromPennFormat(parse); - if (tree == null) System.err.printf("WARNING: SAMT labeler: %s is not a parse tree\n", parse); + if (tree == null) LOG.error("WARNING: SAMT labeler: {} is not a parse tree\n", parse); } public int getLabel(int from, int to) { diff --git a/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java index 03471ad..b6ea4bd 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java +++ b/src/main/java/edu/jhu/thrax/hadoop/distributional/CommonLSH.java @@ -1,19 +1,21 @@ package edu.jhu.thrax.hadoop.distributional; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.sim.SLSH; public class CommonLSH { + + private static final Logger LOG = LoggerFactory.getLogger(CommonLSH.class); public static SLSH getSLSH(Configuration conf) { SLSH slsh = null; try { slsh = new SLSH(true); - //slsh.initialize(conf.getInt("thrax.lsh-num-bits", 256), - // conf.getInt("thrax.lsh-pool-size", 100000), conf.getInt("thrax.lsh-random-seed", 42)); } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage()); System.exit(1); } return slsh; diff --git a/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java b/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java index 93edf68..02ff557 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java +++ b/src/main/java/edu/jhu/thrax/hadoop/distributional/ContextWritable.java @@ -67,12 +67,12 @@ public void merge(ContextWritable that, SLSH slsh) { this.mergeSums(that, slsh); } else { for (Writable feature_text : that.map.keySet()) { - int feature_value = ((IntWritable) that.map.get(feature_text)).get(); - IntWritable current_value = (IntWritable) this.map.get(feature_text); - if (current_value != null) - this.map.put(feature_text, new IntWritable(current_value.get() + feature_value)); + int featureValue = ((IntWritable) that.map.get(feature_text)).get(); + IntWritable currentValue = (IntWritable) this.map.get(feature_text); + if (currentValue != null) + this.map.put(feature_text, new IntWritable(currentValue.get() + featureValue)); else - this.map.put(feature_text, new IntWritable(feature_value)); + this.map.put(feature_text, new IntWritable(featureValue)); } } } @@ -83,20 +83,20 @@ private void mergeSums(ContextWritable that, SLSH slsh) { if (!that.compacted.get()) { throw new RuntimeException("Trying to merge sums on un-compacted ContextWritable."); } - Signature this_signature = new Signature(); - Signature that_signature = new Signature(); + Signature thisSignature = new Signature(); + Signature thatSignature = new Signature(); // TODO: probably needs deep copy. - this_signature.sums = sums; - that_signature.sums = sums; - slsh.update(this_signature.toString(), that_signature); + thisSignature.sums = sums; + thatSignature.sums = sums; + slsh.update(thisSignature.toString(), thatSignature.toString(), 1.0); } public void compact(SLSH slsh) { Signature signature = new Signature(); - slsh.initializeSignature(signature); + slsh.signatures.put(signature.toString(), signature); for (Writable feature_name : map.keySet()) { - slsh.updateSignature(signature, ((Text) feature_name).toString(), - ((IntWritable) map.get(feature_name)).get(), 1); + slsh.update(signature.toString(), ((Text) feature_name).toString(), + ((IntWritable) map.get(feature_name)).get()); } compacted.set(true); map = null; diff --git a/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java b/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java index 3c50bc3..a0ee03e 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java +++ b/src/main/java/edu/jhu/thrax/hadoop/extraction/ExtractionMapper.java @@ -6,12 +6,15 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.AlignedRuleWritable; import edu.jhu.thrax.hadoop.datatypes.Annotation; import edu.jhu.thrax.util.Vocabulary; public class ExtractionMapper extends Mapper { + private static final Logger LOG = LoggerFactory.getLogger(ExtractionMapper.class); private RuleWritableExtractor extractor; protected void setup(Context context) throws IOException, InterruptedException { @@ -22,7 +25,7 @@ protected void setup(Context context) throws IOException, InterruptedException { // TODO: static initializer call for what Annotation actually carries would go here. extractor = RuleWritableExtractorFactory.create(context); if (extractor == null) { - System.err.println("WARNING: could not create rule extractor as configured!"); + LOG.error("WARNING: could not create rule extractor as configured!"); } } diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java b/src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java index 7dcf747..3460e71 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java +++ b/src/main/java/edu/jhu/thrax/hadoop/features/WordLexicalProbabilityCalculator.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.util.HashMap; import java.util.Iterator; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.FloatWritable; @@ -13,6 +12,8 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.datatypes.AlignedSentencePair; import edu.jhu.thrax.datatypes.Alignment; @@ -24,6 +25,7 @@ public class WordLexicalProbabilityCalculator extends Configured { public static final long UNALIGNED = 0x0000000000000000L; public static final long MARGINAL = 0x0000000000000000L; + private static final Logger LOG = LoggerFactory.getLogger(WordLexicalProbabilityCalculator.class); public static class Map extends Mapper { private HashMap counts = new HashMap(); @@ -130,7 +132,8 @@ public int getPartition(LongWritable key, IntWritable value, int numPartitions) final int num_elements_per_partition = (int) Math.ceil(max_trg_plus_one / (1.0 * numPartitions)); int trg = ((int) (key.get() >> 32) & Integer.MAX_VALUE); if (trg < 0 || trg >= max_trg_plus_one) { - throw new RuntimeException(String.format("Word id %d out of range %d %d", trg, 0, max_trg_plus_one-1)); + LOG.error("Word id {} out of range {} {}", trg, 0, max_trg_plus_one-1); + throw new RuntimeException(); } return trg / num_elements_per_partition; } diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java index ae7e0fd..a829d8d 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java +++ b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/SourceGivenTargetLexicalProbabilityFeature.java @@ -9,6 +9,8 @@ import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Reducer.Context; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.AlignmentWritable; import edu.jhu.thrax.hadoop.datatypes.Annotation; @@ -20,6 +22,8 @@ @SuppressWarnings("rawtypes") public class SourceGivenTargetLexicalProbabilityFeature implements AnnotationFeature { + + private static final Logger LOG = LoggerFactory.getLogger(SourceGivenTargetLexicalProbabilityFeature.class); public static final String NAME = "f_given_e_lex"; public static final String LABEL = "Lex(f|e)"; @@ -82,7 +86,7 @@ private float sourceGivenTarget(RuleWritable rule, AlignmentWritable f2e) { float p = table.get(target[e], source[f]); prob += (p < 0 ? DEFAULT_PROB : p); if (p < 0) - System.err.printf("WARNING: could not read lexprob p(%s|%s)\n", Vocabulary.word(source[f]), + LOG.warn("WARNING: could not read lexprob p({}|{})\n", Vocabulary.word(source[f]), Vocabulary.word(target[e])); } if (m != 0) diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java index 7a5599e..08a8b96 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java +++ b/src/main/java/edu/jhu/thrax/hadoop/features/annotation/TargetGivenSourceLexicalProbabilityFeature.java @@ -9,6 +9,8 @@ import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Reducer.Context; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.AlignmentWritable; import edu.jhu.thrax.hadoop.datatypes.Annotation; @@ -20,6 +22,8 @@ @SuppressWarnings("rawtypes") public class TargetGivenSourceLexicalProbabilityFeature implements AnnotationFeature { + + private static final Logger LOG = LoggerFactory.getLogger(TargetGivenSourceLexicalProbabilityFeature.class); public static final String NAME = "e_given_f_lex"; public static final String LABEL = "Lex(e|f)"; @@ -83,7 +87,7 @@ private float targetGivenSource(RuleWritable rule, AlignmentWritable e2f) { float p = table.get(source[f], target[e]); prob += (p < 0 ? DEFAULT_PROB : p); if (p <= 0) - System.err.printf("WARNING: could not read lexprob p(%s|%s)\n", Vocabulary.word(target[e]), + LOG.warn("WARNING: could not read lexprob p({}|{})\n", Vocabulary.word(target[e]), Vocabulary.word(source[f])); } if (m != 0) diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java new file mode 100644 index 0000000..aae593e --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/CountOfRuleCountsEstimationJob.java @@ -0,0 +1,135 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import edu.jhu.thrax.hadoop.datatypes.Annotation; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.jobs.ExtractionJob; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; + +@SuppressWarnings("rawtypes") +public class CountOfRuleCountsEstimationJob implements ThraxJob { + + private static final Logger LOG = LoggerFactory.getLogger(CountOfRuleCountsEstimationJob.class); + + //single reducer, as this is where we carry out the regression for which we need all data in a central location + private static final int SINGLE_REDUCER = 1; + + public static final String NAME = "rule_count_of_counts"; + + public static final String COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH = "count-of-counts-estimator"; + + @Override + public String getName() { + return NAME; + } + + @Override + public String getOutputSuffix() { + return getName(); + } + + @Override + public Set> getPrerequisites() { + Set> result = new HashSet<>(); + result.add(ExtractionJob.class); + return result; + } + + @Override + public Job getJob(Configuration conf) throws IOException { + String name = getName(); + Job job = Job.getInstance(conf, name); + job.setJarByClass(this.getClass()); + + job.setMapperClass(this.mapperClass()); + job.setCombinerClass(IntSumReducer.class); + job.setReducerClass(CountOfCountsRegressionReducer.class); + + job.setInputFormatClass(SequenceFileInputFormat.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(SequenceFileOutputFormat.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(IntWritable.class); + + job.setNumReduceTasks(SINGLE_REDUCER); + + FileInputFormat.setInputPaths(job, new Path(conf.get("thrax.work-dir") + "rules")); + FileOutputFormat.setOutputPath(job, new Path(conf.get("thrax.work-dir") + name)); + return job; + } + + public Class mapperClass() { + return CustomMap.class; + } + + private static class CustomMap extends Mapper { + + private final static IntWritable ONE = new IntWritable(1); + + protected void map(RuleWritable key, Annotation value, Context context) throws IOException, + InterruptedException { + IntWritable count = new IntWritable(value.count()); + context.write(count, ONE); + } + } + + /** + * Writes counts of counts and produces a linear regression of the log-log plot of the data. + */ + private static class CountOfCountsRegressionReducer extends IntSumReducer { + + private Map countOfCounts = new HashMap<>(); + + public void reduce(IntWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (countOfCounts.containsKey(key.get())) { + throw new RuntimeException( + String.format(Locale.ROOT, "Duplicate key %d in counts of counts.", key.get())); + } + + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + countOfCounts.put(key.get(), sum); + } + + @Override + protected void cleanup( + Reducer.Context context) + throws IOException, InterruptedException { + CountOfCountsEstimator estimator = CountOfCountsEstimator.regress(countOfCounts); + LOG.error("Created CountOfCountsEstimator with slope {} and intercept {}", + estimator.getSlope(), estimator.getIntercept()); + + Configuration conf = context.getConfiguration(); + Path outPath = new Path(conf.getRaw("thrax.work-dir"), COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + + HdfsUtils.writeObjectToFs(conf, estimator, outPath); + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java new file mode 100644 index 0000000..610b0d7 --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedSourcePhraseGivenTargetFeature.java @@ -0,0 +1,118 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; + +import edu.jhu.thrax.hadoop.comparators.PrimitiveArrayMarginalComparator; +import edu.jhu.thrax.hadoop.datatypes.FeaturePair; +import edu.jhu.thrax.hadoop.datatypes.PrimitiveUtils; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.features.mapred.coc.GoodTuringSmoother; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; +import edu.jhu.thrax.util.Vocabulary; + +@SuppressWarnings("rawtypes") +public class GoodTuringSmoothedSourcePhraseGivenTargetFeature extends MapReduceFeature { + + public static final String NAME = "f_given_e_phrase_gt_smoothed"; + public static final String LABEL = "p_gt(f|e)"; + + public String getName() { + return NAME; + } + + public String getLabel() { + return LABEL; + } + + @Override + public Set> getPrerequisites() { + Set> parentPrerequisites = super.getPrerequisites(); + Set> prerequisites = new HashSet>(parentPrerequisites.size()+1); + prerequisites.add(CountOfRuleCountsEstimationJob.class); + return prerequisites; + } + + public Class sortComparatorClass() { + return SourcePhraseGivenTargetFeature.Comparator.class; + } + + public Class partitionerClass() { + return RuleWritable.TargetPartitioner.class; + } + + public Class mapperClass() { + return SourcePhraseGivenTargetFeature.Map.class; + } + + public Class reducerClass() { + return Reduce.class; + } + + private static class Reduce extends Reducer { + private int marginal; + private FloatWritable prob; + + private GoodTuringSmoother goodTuringSmoother; + + protected void setup(Context context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + String vocabulary_path = conf.getRaw("thrax.work-dir") + "vocabulary/part-*"; + Vocabulary.initialize(conf, vocabulary_path); + + Path inPath = new Path(conf.getRaw("thrax.work-dir"), + CountOfRuleCountsEstimationJob.COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + try { + goodTuringSmoother = new GoodTuringSmoother(HdfsUtils.readObjectFromFs(conf, inPath)); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + protected void reduce(RuleWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (Arrays.equals(key.source, PrimitiveArrayMarginalComparator.MARGINAL)) { + marginal = 0; + for (IntWritable x : values) + marginal += x.get(); + return; + } + if (key.lhs == PrimitiveUtils.MARGINAL_ID) { + int count = 0; + for (IntWritable x : values) + count += x.get(); + + double smoothedCount = goodTuringSmoother.smoothedCount(count); + + prob = new FloatWritable((float) -Math.log(smoothedCount / (float) marginal)); + return; + } + context.write(key, new FeaturePair(Vocabulary.id(LABEL), prob)); + } + + } + + private static final FloatWritable ZERO = new FloatWritable(0.0f); + + public void unaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } + + public void binaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } +} diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java new file mode 100644 index 0000000..ede8b69 --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/GoodTuringSmoothedTargetPhraseGivenSourceFeature.java @@ -0,0 +1,118 @@ +package edu.jhu.thrax.hadoop.features.mapred; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparator; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.Reducer; + +import edu.jhu.thrax.hadoop.comparators.PrimitiveArrayMarginalComparator; +import edu.jhu.thrax.hadoop.datatypes.FeaturePair; +import edu.jhu.thrax.hadoop.datatypes.PrimitiveUtils; +import edu.jhu.thrax.hadoop.datatypes.RuleWritable; +import edu.jhu.thrax.hadoop.features.mapred.coc.CountOfCountsEstimator; +import edu.jhu.thrax.hadoop.features.mapred.coc.GoodTuringSmoother; +import edu.jhu.thrax.hadoop.jobs.ThraxJob; +import edu.jhu.thrax.util.HdfsUtils; +import edu.jhu.thrax.util.Vocabulary; + +@SuppressWarnings("rawtypes") +public class GoodTuringSmoothedTargetPhraseGivenSourceFeature extends MapReduceFeature { + + public static final String NAME = "e_given_f_phrase_gt_smoothed"; + public static final String LABEL = "p_gt(e|f)"; + + public String getName() { + return NAME; + } + + public String getLabel() { + return LABEL; + } + + @Override + public Set> getPrerequisites() { + Set> parentPrerequisites = super.getPrerequisites(); + Set> prerequisites = new HashSet>(parentPrerequisites.size()+1); + prerequisites.add(CountOfRuleCountsEstimationJob.class); + return prerequisites; + } + + public Class sortComparatorClass() { + return TargetPhraseGivenSourceFeature.Comparator.class; + } + + public Class partitionerClass() { + return RuleWritable.SourcePartitioner.class; + } + + public Class mapperClass() { + return TargetPhraseGivenSourceFeature.Map.class; + } + + public Class reducerClass() { + return Reduce.class; + } + + private static class Reduce extends Reducer { + private int marginal; + private FloatWritable prob; + + private GoodTuringSmoother goodTuringSmoother; + + protected void setup(Context context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + String vocabulary_path = conf.getRaw("thrax.work-dir") + "vocabulary/part-*"; + Vocabulary.initialize(conf, vocabulary_path); + + Path inPath = new Path(conf.getRaw("thrax.work-dir"), + CountOfRuleCountsEstimationJob.COUNT_OF_COUNT_ESTIMATOR_OUTPUT_PATH); + try { + goodTuringSmoother = new GoodTuringSmoother(HdfsUtils.readObjectFromFs(conf, inPath)); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + protected void reduce(RuleWritable key, Iterable values, Context context) + throws IOException, InterruptedException { + if (Arrays.equals(key.target, PrimitiveArrayMarginalComparator.MARGINAL)) { + marginal = 0; + for (IntWritable x : values) + marginal += x.get(); + return; + } + if (key.lhs == PrimitiveUtils.MARGINAL_ID) { + int count = 0; + for (IntWritable x : values) + count += x.get(); + + double smoothedCount = goodTuringSmoother.smoothedCount(count); + + prob = new FloatWritable((float) -Math.log(smoothedCount / (float) marginal)); + return; + } + context.write(key, new FeaturePair(Vocabulary.id(LABEL), prob)); + } + + } + + private static final FloatWritable ZERO = new FloatWritable(0.0f); + + public void unaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } + + public void binaryGlueRuleScore(int nt, java.util.Map map) { + map.put(Vocabulary.id(LABEL), ZERO); + } +} diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java new file mode 100644 index 0000000..c87dc5e --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/CountOfCountsEstimator.java @@ -0,0 +1,118 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.hadoop.features.mapred.coc; + +import static java.lang.Math.*; + +import java.io.Serializable; +import java.util.Map; +import java.util.Map.Entry; + +/** + * Linear estimator of count of counts in log-log space. + * + * y = exp(slope * log(count) + intercept) + */ +public class CountOfCountsEstimator implements Serializable { + + private static final long serialVersionUID = -7988102132725579097L; + + private final double slope; + private final double intercept; + + public CountOfCountsEstimator(double slope, double intercept) { + this.slope = slope; + this.intercept = intercept; + } + + public double getSlope() { + return slope; + } + + public double getIntercept() { + return intercept; + } + + public long getRoundedCountOfCount(long count) { + return Math.round(getEstimatedCountOfCount(count)); + } + + public double getEstimatedCountOfCount(long count) { + return exp(slope * log(count) + intercept); + } + + public static CountOfCountsEstimator regress(Map countOfCountsMap) { + double[] counts = new double[countOfCountsMap.size()]; + double[] countOfCounts = new double[countOfCountsMap.size()]; + int idx = 0; + for (Entry e : countOfCountsMap.entrySet()) { + counts[idx] = e.getKey(); + countOfCounts[idx] = e.getValue(); + idx += 1; + } + return regress(counts, countOfCounts); + } + + /** + * Weighted least squares regression in log-log space of count of counts data. + * + * We can solve this by OLS with scaling our input data with sqrt(weight). + * As a weight we use the count of counts of each data point. + * This is the more often a count appears the more weight it gets. + * + * We get: + * x1: sqrt(counts of counts) + * x2: sqrt(counts of counts) * log(counts) + * y: sqrt(counts of counts) * log(counts of counts) + * w: weighted by x (the counts) + * + * OLS solution is: + * (X^T X)^-1 X^Ty + */ + public static CountOfCountsEstimator regress(double[] counts, double[] countsOfCounts) { + if (!(counts.length == countsOfCounts.length)) { + throw new RuntimeException("Dimensions of counts and countsOfCounts must match."); + } + + final int numDataPoints = counts.length; + double[] x1 = new double[numDataPoints]; + double[] x2 = new double[numDataPoints]; + double[] y = new double[numDataPoints]; + for (int i = 0; i < numDataPoints; i++) { + double sqrt_of_weight = sqrt(countsOfCounts[i]); + x1[i] = sqrt_of_weight * 1.0; // bias (for intercept) + x2[i] = sqrt_of_weight * log(counts[i]); // feature + y[i] = sqrt_of_weight * log(countsOfCounts[i]); // target + } + + //X^T X + double xs00 = 0; + double xs01 = 0; //symmetric matrix: xs01 == xs10 + double xs11 = 0; + for (int j = 0; j < x1.length; j++) { + xs00 += x1[j] * x1[j]; + xs01 += x1[j] * x2[j]; + xs11 += x2[j] * x2[j]; + } + + // matrix inverse to get (X^T X)^-1 + double denom = xs00 * xs11 - xs01 * xs01; + double xs00_inv = xs11 / denom; + double xs01_inv = -xs01 / denom; + double xs11_inv = xs00 / denom; + + //X^T y + double xty0 = 0.; + double xty1 = 0.; + for (int j = 0; j < x1.length; j++) { + xty0 += x1[j] * y[j]; + xty1 += x2[j] *y [j]; + } + + //bringing everything together: [intercept slope]^T = (X^T X)^-1 X^T y + double intercept = xs00_inv * xty0 + xty1 * xs01_inv; + double slope = xs01_inv * xty0 + xty1 * xs11_inv; + + return new CountOfCountsEstimator(slope, intercept); + } + +} \ No newline at end of file diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java new file mode 100644 index 0000000..9e66936 --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/GoodTuringSmoother.java @@ -0,0 +1,15 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.hadoop.features.mapred.coc; + +public class GoodTuringSmoother { + private CountOfCountsEstimator estimator; + + public GoodTuringSmoother(CountOfCountsEstimator estimator) { + this.estimator = estimator; + } + + public double smoothedCount(int count) { + double turingFraction = estimator.getEstimatedCountOfCount(count + 1) / estimator.getEstimatedCountOfCount(count); + return (count + 1) * turingFraction; + } +} \ No newline at end of file diff --git a/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/package-info.java b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/package-info.java new file mode 100644 index 0000000..a8dba2a --- /dev/null +++ b/src/main/java/edu/jhu/thrax/hadoop/features/mapred/coc/package-info.java @@ -0,0 +1,8 @@ +/** + * + */ +/** + * @author lewismc + * + */ +package edu.jhu.thrax.hadoop.features.mapred.coc; \ No newline at end of file diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java index b5e971e..f82dcea 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/FeatureCollectionJob.java @@ -12,6 +12,8 @@ import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.FeatureMap; import edu.jhu.thrax.hadoop.datatypes.FeaturePair; @@ -19,6 +21,8 @@ import edu.jhu.thrax.hadoop.paraphrasing.FeatureCollectionReducer; public class FeatureCollectionJob implements ThraxJob { + + private static final Logger LOG = LoggerFactory.getLogger(FeatureCollectionJob.class); private static HashSet> prereqs = new HashSet>(); @@ -32,7 +36,7 @@ public static void addPrerequisite(Class c) { prereq = c.newInstance(); prereq_names.add(prereq.getOutputSuffix()); } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage()); } } diff --git a/src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java b/src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java index e186a61..0a09e7c 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java +++ b/src/main/java/edu/jhu/thrax/hadoop/jobs/Scheduler.java @@ -5,10 +5,13 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.util.FormatUtils; public class Scheduler { + private static final Logger LOG = LoggerFactory.getLogger(Scheduler.class); private HashSet faked; private HashMap, JobState> jobs; @@ -30,14 +33,13 @@ public boolean schedule(Class jobClass) throws SchedulerExce try { job = jobClass.newInstance(); } catch (Exception e) { - e.printStackTrace(); throw new SchedulerException(e.getMessage()); } for (Class c : job.getPrerequisites()) { schedule(c); } jobs.put(jobClass, JobState.PLANNED); - System.err.println("[SCHED] planned job for " + jobClass); + LOG.error("[SCHED] planned job for {}", jobClass); return true; } @@ -45,7 +47,7 @@ public boolean setState(Class job_class, JobState state) throws SchedulerException { if (jobs.containsKey(job_class)) { jobs.put(job_class, state); - System.err.println(String.format("[SCHED] %s in state %s", job_class, state)); + LOG.error("[SCHED] {} in state {}", job_class, state); updateAllStates(); return true; } diff --git a/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java index cfeb5a0..4ec6c74 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java +++ b/src/main/java/edu/jhu/thrax/hadoop/paraphrasing/PivotingReducer.java @@ -12,6 +12,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.Annotation; import edu.jhu.thrax.hadoop.datatypes.FeatureMap; @@ -27,6 +29,8 @@ public class PivotingReducer extends Reducer { + private static final Logger LOG = LoggerFactory.getLogger(PivotingReducer.class); + private static enum PivotingCounters { F_READ, EF_READ, EF_PRUNED, EE_PRUNED, EE_WRITTEN } @@ -146,7 +150,7 @@ protected void pivotOne(ParaphrasePattern src, ParaphrasePattern tgt, Context co StringBuilder tgt_f = new StringBuilder(); for (int w : tgt.features.keySet()) tgt_f.append(Vocabulary.word(w) + "=" + tgt.features.get(w) + " "); - e.printStackTrace(); + LOG.error(e.getMessage()); throw new RuntimeException(Vocabulary.getWords(src.rhs) + " \n " + Vocabulary.getWords(tgt.rhs) + " \n " + src_f.toString() + " \n " + tgt_f.toString() + " \n"); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java index baefe8f..c7db026 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/ExtractionTool.java @@ -13,17 +13,21 @@ import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.RuleWritable; import edu.jhu.thrax.hadoop.extraction.ExtractionMapper; import edu.jhu.thrax.util.ConfFileParser; -public class ExtractionTool extends Configured implements Tool -{ +public class ExtractionTool extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(ExtractionTool.class); + public int run(String [] argv) throws Exception { if (argv.length < 1) { - System.err.println("USAGE: ExtractionTool "); + LOG.error("USAGE: ExtractionTool "); return 1; } String thraxConf = argv[0]; @@ -35,12 +39,12 @@ public int run(String [] argv) throws Exception } String inputPath = conf.get("thrax.input-file"); if (inputPath == null) { - System.err.println("Set input-file key in conf file " + thraxConf + "!"); + LOG.error("Set input-file key in conf file " + thraxConf + "!"); return 1; } String workDir = conf.get("thrax.work-dir"); if (workDir == null) { - System.err.println("Set work-dir key in conf file " + thraxConf + "!"); + LOG.error("Set work-dir key in conf file " + thraxConf + "!"); return 1; } diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java index 07d0ec0..2c483ea 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/FeatureTool.java @@ -1,5 +1,6 @@ package edu.jhu.thrax.hadoop.tools; +import java.util.Locale; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -13,25 +14,29 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.RuleWritable; import edu.jhu.thrax.hadoop.features.mapred.MapReduceFeature; import edu.jhu.thrax.hadoop.features.mapred.MapReduceFeatureFactory; import edu.jhu.thrax.util.ConfFileParser; -public class FeatureTool extends Configured implements Tool -{ +public class FeatureTool extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(FeatureTool.class); + public int run(String [] argv) throws Exception { if (argv.length < 2) { - System.err.println("usage: FeatureTool "); + LOG.error("usage: FeatureTool "); return 1; } String confFile = argv[0]; String featureName = argv[1]; MapReduceFeature f = MapReduceFeatureFactory.get(featureName); if (!(f instanceof MapReduceFeature)) { - System.err.println("Not a MapReduceFeature: " + featureName); + LOG.error("Not a MapReduceFeature: {}", featureName); return 1; } Configuration conf = getConf(); @@ -41,14 +46,14 @@ public int run(String [] argv) throws Exception } String workDir = conf.get("thrax.work-dir"); if (workDir == null) { - System.err.println("set work-dir key in conf file " + confFile + "!"); + LOG.error("set work-dir key in conf file {}!", confFile); return 1; } if (!workDir.endsWith(Path.SEPARATOR)) { workDir += Path.SEPARATOR; conf.set("thrax.work-dir", workDir); } - Job job = Job.getInstance(conf, String.format("thrax-%s", featureName)); + Job job = Job.getInstance(conf, String.format(Locale.ROOT, "thrax-%s", featureName)); job.setJarByClass(f.getClass()); job.setMapperClass(f.mapperClass()); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java index c357e54..7e17fcc 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/OutputTool.java @@ -13,6 +13,8 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.RuleWritable; import edu.jhu.thrax.hadoop.features.mapred.MapReduceFeature; @@ -22,12 +24,14 @@ import edu.jhu.thrax.util.ConfFileParser; import edu.jhu.thrax.util.FormatUtils; -public class OutputTool extends Configured implements Tool -{ +public class OutputTool extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(OutputTool.class); + public int run(String [] argv) throws Exception { if (argv.length < 1) { - System.err.println("usage: OutputTool "); + LOG.error("usage: OutputTool "); return 1; } String confFile = argv[0]; @@ -38,7 +42,7 @@ public int run(String [] argv) throws Exception } String workDir = conf.get("thrax.work-dir"); if (workDir == null) { - System.err.println("Set work-dir key in conf file " + confFile + "!"); + LOG.error("Set work-dir key in conf file " + confFile + "!"); return 1; } if (!workDir.endsWith(Path.SEPARATOR)) { diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java index 24217cf..d0756f5 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/SourceWordGivenTargetWordProbabilityTool.java @@ -14,18 +14,21 @@ import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.TextPair; import edu.jhu.thrax.hadoop.features.WordLexicalProbabilityCalculator; import edu.jhu.thrax.hadoop.jobs.WordLexprobJob; import edu.jhu.thrax.util.ConfFileParser; -public class SourceWordGivenTargetWordProbabilityTool extends Configured implements Tool -{ +public class SourceWordGivenTargetWordProbabilityTool extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(SourceWordGivenTargetWordProbabilityTool.class); public int run(String [] argv) throws Exception { if (argv.length < 1) { - System.err.println("usage: SourceWordGivenTargetWordProbabilityTool "); + LOG.error("usage: SourceWordGivenTargetWordProbabilityTool "); return 1; } String confFile = argv[0]; @@ -36,19 +39,19 @@ public int run(String [] argv) throws Exception } String input = conf.get("thrax.input-file"); if (input == null) { - System.err.println("set input-file key in conf file " + confFile + "!"); + LOG.error("set input-file key in conf file " + confFile + "!"); return 1; } String workDir = conf.get("thrax.work-dir"); if (workDir == null) { - System.err.println("set work-dir key in conf file " + confFile + "!"); + LOG.error("set work-dir key in conf file " + confFile + "!"); return 1; } if (!workDir.endsWith(Path.SEPARATOR)) { workDir += Path.SEPARATOR; conf.set("thrax.work-dir", workDir); } - conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, true); + conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, true); Job job = Job.getInstance(conf, "thrax-sgt-word-lexprob"); job.setJarByClass(WordLexicalProbabilityCalculator.class); diff --git a/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java b/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java index e3215bf..91050b6 100644 --- a/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java +++ b/src/main/java/edu/jhu/thrax/hadoop/tools/TargetWordGivenSourceWordProbabilityTool.java @@ -14,18 +14,22 @@ import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.hadoop.datatypes.TextPair; import edu.jhu.thrax.hadoop.features.WordLexicalProbabilityCalculator; import edu.jhu.thrax.hadoop.jobs.WordLexprobJob; import edu.jhu.thrax.util.ConfFileParser; -public class TargetWordGivenSourceWordProbabilityTool extends Configured implements Tool -{ +public class TargetWordGivenSourceWordProbabilityTool extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(TargetWordGivenSourceWordProbabilityTool.class); + public int run(String [] argv) throws Exception { if (argv.length < 1) { - System.err.println("usage: TargetWordGivenSourceWordProbabilityTool "); + LOG.error("usage: TargetWordGivenSourceWordProbabilityTool "); return 1; } String confFile = argv[0]; @@ -36,19 +40,19 @@ public int run(String [] argv) throws Exception } String input = conf.get("thrax.input-file"); if (input == null) { - System.err.println("set input-file key in conf file " + confFile + "!"); + LOG.error("set input-file key in conf file {}!", confFile); return 1; } String workDir = conf.get("thrax.work-dir"); if (workDir == null) { - System.err.println("set work-dir key in conf file " + confFile + "!"); + LOG.error("set work-dir key in conf file {}!", confFile); return 1; } if (!workDir.endsWith(Path.SEPARATOR)) { workDir += Path.SEPARATOR; conf.set("thrax.work-dir", workDir); } - conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, false); + conf.setBoolean(WordLexprobJob.SOURCE_GIVEN_TARGET, false); Job job = Job.getInstance(conf, "thrax-tgs-word-lexprob"); job.setJarByClass(WordLexicalProbabilityCalculator.class); diff --git a/src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java index 9a0b2be..aee64d7 100644 --- a/src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java +++ b/src/main/java/edu/jhu/thrax/lexprob/HashMapLexprobTable.java @@ -4,8 +4,11 @@ import java.util.HashMap; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class HashMapLexprobTable extends SequenceFileLexprobTable { + private static final Logger LOG = LoggerFactory.getLogger(HashMapLexprobTable.class); private HashMap table; public HashMapLexprobTable(Configuration conf, String fileGlob) throws IOException { @@ -18,7 +21,7 @@ public void initialize(Iterable entries) { table = new HashMap(); for (TableEntry te : entries) { table.put((((long) te.car << 32) | te.cdr), te.probability); - if (table.size() % 1000 == 0) System.err.printf("[%d]\n", table.size()); + if (table.size() % 1000 == 0) LOG.error("[{}]\n", table.size()); } } diff --git a/src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java b/src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java index 0336aea..15738b2 100644 --- a/src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java +++ b/src/main/java/edu/jhu/thrax/lexprob/LexprobTest.java @@ -4,19 +4,24 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class LexprobTest extends Configured implements Tool { + + private static final Logger LOG = LoggerFactory.getLogger(LexprobTest.class); + public int run(String[] argv) throws Exception { if (argv.length < 1) { - System.err.println("usage: LexprobTest "); + LOG.error("usage: LexprobTest "); return 1; } Configuration conf = getConf(); HashMapLexprobTable t = new HashMapLexprobTable(conf, argv[0]); - System.err.println("HashMap populated: " + t.toString()); + LOG.info("HashMap populated: {}", t.toString()); TrieLexprobTable trie = new TrieLexprobTable(conf, argv[0]); - System.err.println("Trie populated: " + trie.toString()); + LOG.info("Trie populated: {}", trie.toString()); return 0; } diff --git a/src/main/java/edu/jhu/thrax/lexprob/TableEntry.java b/src/main/java/edu/jhu/thrax/lexprob/TableEntry.java index ac93cbf..0a6c7a1 100644 --- a/src/main/java/edu/jhu/thrax/lexprob/TableEntry.java +++ b/src/main/java/edu/jhu/thrax/lexprob/TableEntry.java @@ -1,5 +1,7 @@ package edu.jhu.thrax.lexprob; +import java.util.Locale; + import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; @@ -19,7 +21,7 @@ public TableEntry(LongWritable pair, FloatWritable d) { } public String toString() { - return String.format("(%s,%s):%.4f", car, cdr, probability); + return String.format(Locale.ROOT, "(%s,%s):%.4f", car, cdr, probability); } public boolean equals(Object o) { diff --git a/src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java b/src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java index 861849a..4f904d8 100644 --- a/src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java +++ b/src/main/java/edu/jhu/thrax/lexprob/TrieLexprobTable.java @@ -4,13 +4,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Locale; import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class TrieLexprobTable extends SequenceFileLexprobTable { private int[] cars; private int[][] cdrs; private float[][] values; + private static final Logger LOG = LoggerFactory.getLogger(TrieLexprobTable.class); public TrieLexprobTable(Configuration conf, String fileGlob) throws IOException { super(conf, fileGlob); @@ -51,7 +55,7 @@ protected void initialize(Iterable entries) { cdrList.clear(); valueList.clear(); i++; - if (i % 1000 == 0) System.err.printf("[%d]\n", i); + if (i % 1000 == 0) LOG.error("[{}]\n", i); cars[i] = te.car; car = cars[i]; } @@ -66,13 +70,13 @@ protected void initialize(Iterable entries) { private void checkIntegrity() { for (int i = 0; i < cars.length-1; i++) { if (cars[i] > cars[i+1]) { - final String msg = String.format("Failed loading TrieLexprobTable. Entries must be sorted ascendingly, but cars[%d]=%d > cars[%d]=%d", + final String msg = String.format(Locale.ROOT, "Failed loading TrieLexprobTable. Entries must be sorted ascendingly, but cars[%d]=%d > cars[%d]=%d", i, cars[i], i+1, cars[i+1]); throw new RuntimeException(msg); } for (int j = 0; j < cdrs[i].length-1; j++) { if (cdrs[i][j] > cdrs[i][j+1]) { - final String msg = String.format("Failed loading TrieLexprobTable. Entries must be sorted ascendingly, but cdrs[%d][%d]=%d > cdrs[%d][%d]=%d", + final String msg = String.format(Locale.ROOT, "Failed loading TrieLexprobTable. Entries must be sorted ascendingly, but cdrs[%d][%d]=%d > cdrs[%d][%d]=%d", i, j, cdrs[i][j], i+1, j+1, cdrs[i][j+1]); throw new RuntimeException(msg); } diff --git a/src/main/java/edu/jhu/thrax/syntax/LatticeArray.java b/src/main/java/edu/jhu/thrax/syntax/LatticeArray.java index 000ebeb..ad9b93d 100644 --- a/src/main/java/edu/jhu/thrax/syntax/LatticeArray.java +++ b/src/main/java/edu/jhu/thrax/syntax/LatticeArray.java @@ -9,16 +9,22 @@ import java.util.EmptyStackException; import java.util.HashMap; import java.util.HashSet; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.Stack; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.thrax.util.ExternalizableToUtf8; import edu.jhu.thrax.util.Vocabulary; import edu.jhu.thrax.util.exceptions.MalformedParseException; import edu.jhu.thrax.util.io.LineReader; public class LatticeArray implements ParseLattice, Externalizable, ExternalizableToUtf8 { + + private static final Logger LOG = LoggerFactory.getLogger(LatticeArray.class); /** * A random number to get rid of the warning. @@ -142,7 +148,7 @@ public Collection getConstituentLabels(int from, int to) { public int getOneConstituent(int from, int to) { int spanLength = to - from; - Stack stack = new Stack(); + Stack stack = new Stack<>(); for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) { int currentSpan = forwardLattice.get(i + 1); @@ -443,7 +449,7 @@ private void appendFromPennFormat(String line) throws MalformedParseException { forwardLattice.add(forwardIndex.size()); if (pos) pre_terminals.add(current_id); } else { - current_id = Vocabulary.id((lowercase ? token.toLowerCase() : token)); + current_id = Vocabulary.id((lowercase ? token.toLowerCase(Locale.ROOT) : token)); terminals.add(current_id); forwardIndex.add(forwardLattice.size()); @@ -461,7 +467,6 @@ public static void main(String[] args) { try { la.readExternalUtf8(args[0]); - // System.err.println(la); int from = Integer.parseInt(args[1]); int to = Integer.parseInt(args[2]); @@ -469,18 +474,18 @@ public static void main(String[] args) { Collection labels; labels = la.getConstituentLabels(from, to); for (int l : labels) - System.err.println(Vocabulary.word(l)); + LOG.info(Vocabulary.word(l)); labels = la.getConcatenatedLabels(from, to); for (int l : labels) - System.err.println(Vocabulary.word(l)); + LOG.info(Vocabulary.word(l)); labels = la.getCcgLabels(from, to); for (int l : labels) - System.err.println(Vocabulary.word(l)); + LOG.info(Vocabulary.word(l)); } } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage()); } } diff --git a/src/main/java/edu/jhu/thrax/syntax/ParseTree.java b/src/main/java/edu/jhu/thrax/syntax/ParseTree.java index a5e60e6..90528ef 100644 --- a/src/main/java/edu/jhu/thrax/syntax/ParseTree.java +++ b/src/main/java/edu/jhu/thrax/syntax/ParseTree.java @@ -7,9 +7,13 @@ import java.util.EmptyStackException; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Scanner; import java.util.Stack; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.thrax.util.Vocabulary; public class ParseTree { @@ -17,6 +21,7 @@ public class ParseTree { private final int[] numChildren; private final int[] start; private final int[] end; + private static final Logger LOG = LoggerFactory.getLogger(ParseTree.class); private ParseTree(int[] ls, int[] cs, int[] ss, int[] es) { labels = ls; @@ -199,7 +204,7 @@ public Iterator children() { public String toString() { if (isLeaf()) return Vocabulary.word(label()); - String result = String.format("(%s", label()); + String result = String.format(Locale.ROOT, "(%s", label()); Iterator children = children(); while (children.hasNext()) result += " " + children.next().toString(); @@ -250,10 +255,10 @@ private int nextSiblingIndex(int i) { } public static void main(String[] argv) throws IOException { - Scanner scanner = new Scanner(System.in); + Scanner scanner = new Scanner(System.in, "UTF_8"); while (scanner.hasNextLine()) { ParseTree tree = ParseTree.fromPennFormat(scanner.nextLine()); - System.out.printf("%s\t%d\n", tree, tree.hashCode()); + LOG.info("{}\t{}\n", tree, tree.hashCode()); } scanner.close(); } diff --git a/src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java b/src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java index 2bb5965..f3641c9 100644 --- a/src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java +++ b/src/main/java/edu/jhu/thrax/tools/ExtractPropbankRules.java @@ -6,6 +6,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.LinkedList; +import java.util.Locale; import java.util.Queue; import java.util.logging.Logger; @@ -96,7 +97,7 @@ public static void main(String[] args) { if (rel.suffix(rel_spans.get(i))) rel.t = rel_spans.get(i).t; // Build predicate-only entries. - String rel_string = parse.getTerminalPhrase(rel.f, rel.t).toLowerCase(); + String rel_string = parse.getTerminalPhrase(rel.f, rel.t).toLowerCase(Locale.ROOT); if (continuous) { for (String label : getLabels(parse, rel)) @@ -131,7 +132,7 @@ public static void main(String[] args) { i++; } else { for (StringBuilder b : surfaces) - b.append(parse.getTerminal(p).toLowerCase() + " "); + b.append(parse.getTerminal(p).toLowerCase(Locale.ROOT) + " "); p++; } } diff --git a/src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java b/src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java index e71eb34..c62ce67 100644 --- a/src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java +++ b/src/main/java/edu/jhu/thrax/tools/JudgeParaphrases.java @@ -2,15 +2,18 @@ import java.io.BufferedWriter; import java.io.IOException; +import java.util.Locale; import java.util.Scanner; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.io.LineReader; public class JudgeParaphrases { - - private static final Logger logger = Logger.getLogger(JudgeParaphrases.class.getName()); + + private static final Logger LOG = LoggerFactory.getLogger(JudgeParaphrases.class); public static void main(String[] args) { @@ -26,11 +29,11 @@ public static void main(String[] args) { } if (input == null) { - logger.severe("No input file specified."); + LOG.error("No input file specified."); return; } if (output == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } @@ -40,19 +43,19 @@ public static void main(String[] args) { try { reader = new LineReader(input); writer = FileManager.getWriter(output); - user = new Scanner(System.in); + user = new Scanner(System.in, "UTF-8"); while (reader.hasNext()) { String pp = reader.next().trim(); - System.out.print(pp + "\t"); + LOG.info("{}\t", pp); String score = user.next().trim(); - if (score.toLowerCase().equals("quit") || score.toLowerCase().equals("exit")) + if (score.toLowerCase(Locale.ROOT).equals("quit") || score.toLowerCase(Locale.ROOT).equals("exit")) break; writer.write(score + "\t" + pp + "\n"); } reader.close(); writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } diff --git a/src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java index f9f4d0b..5c6b081 100644 --- a/src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java +++ b/src/main/java/edu/jhu/thrax/tools/ParaphraseCoverage.java @@ -8,7 +8,9 @@ import java.util.List; import java.util.PriorityQueue; import java.util.Random; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; @@ -16,7 +18,7 @@ public class ParaphraseCoverage { - private static final Logger logger = Logger.getLogger(ParaphraseCoverage.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(ParaphraseCoverage.class); public static void main(String[] args) { @@ -52,23 +54,23 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (reference_file == null) { - logger.severe("No reference file specified."); + LOG.error("No reference file specified."); return; } if (weight_file == null) { - logger.severe("No weight file specified."); + LOG.error("No weight file specified."); return; } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } if (judgment_prefix != null && sampling_points == null) { - logger.severe("Need sampling points if judgment dump is requested."); + LOG.error("Need sampling points if judgment dump is requested."); return; } @@ -140,7 +142,7 @@ public static void main(String[] args) { if (relevant_file != null) rel_writer = FileManager.getWriter(relevant_file); LineReader reader = new LineReader(grammar_file); - System.err.print("["); + LOG.error("["); int rule_count = 0; while (reader.hasNext()) { String rule_line = reader.next().trim(); @@ -159,11 +161,11 @@ public static void main(String[] args) { score += weights.get(parts[0]) * Double.parseDouble(parts[1]); } - if (++rule_count % 10000 == 0) System.err.print("-"); + if (++rule_count % 10000 == 0) LOG.error("-"); paraphrases.add(new ScoredParaphrase(candidate_phrase, fields[2], score)); } - System.err.println("]"); + LOG.error("]"); reader.close(); if (rel_writer != null) rel_writer.close(); @@ -188,9 +190,9 @@ public static void main(String[] args) { } } - System.err.println("Items: " + num_items); - System.err.println("Covered: " + num_covered); - System.err.println("Paraphrases: " + num_paraphrases); + LOG.info("Items: {}", num_items); + LOG.info("Covered: {}", num_covered); + LOG.info("Paraphrases: {}", num_paraphrases); boolean judge = (judgment_prefix != null); BufferedWriter cand_writer = null; @@ -229,7 +231,7 @@ public static void main(String[] args) { // Sample paraphrases for judgements. if (judge && bin_id < bins.length && last_score < bins[bin_id] && sp.score >= bins[bin_id]) { bin_id++; - logger.info("Sampling bin " + bin_id + " at " + bins[bin_id - 1]); + LOG.info("Sampling bin {} at {}", bin_id, bins[bin_id - 1]); Object[] pps = paraphrases.toArray(); for (int i = 0; i < 200; i++) { @@ -250,7 +252,7 @@ public static void main(String[] args) { if (judge) cand_writer.close(); score_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } } diff --git a/src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java index 5b5e207..eb08ac8 100644 --- a/src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java +++ b/src/main/java/edu/jhu/thrax/tools/ParaphraseIntersect.java @@ -7,17 +7,18 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Locale; -import java.util.logging.Logger; import java.util.regex.Pattern; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; import edu.jhu.thrax.util.io.LineReader; public class ParaphraseIntersect { - private static final Logger logger = Logger.getLogger(ParaphraseIntersect.class.getName()); - + private static final Logger LOG = LoggerFactory.getLogger(ParaphraseIntersect.class); private static final Pattern P_SPACE = Pattern.compile("\\s+"); private static final Pattern P_EQUAL = Pattern.compile("="); @@ -49,19 +50,19 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (reference_file == null) { - logger.severe("No reference file specified."); + LOG.error("No reference file specified."); return; } if (weight_file == null) { - logger.severe("No weight file specified."); + LOG.error("No weight file specified."); return; } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } @@ -89,7 +90,7 @@ public static void main(String[] args) { // TODO: fix sorting to comply with UNIX sort. Likely: LC_COLLATE=C and String.compareTo() Collator comp = Collator.getInstance(Locale.US); - System.err.print("["); + LOG.error("["); int num_references = 0; while (gread.hasNext()) { String rule_line = gread.next().trim(); @@ -109,8 +110,6 @@ public static void main(String[] args) { } } -// System.err.println("Checking: " + rule); - while (rread.hasNext() && (rline == null || comp.compare(rule, rline) > 0)) { String line = rread.next().trim(); String[] rfs = FormatUtils.P_DELIM.split(line); @@ -118,21 +117,18 @@ public static void main(String[] args) { int count = (int) Math.round(1 - Math.log(rarity)); if (count >= threshold && !line.contains("[X") && (identity || !rfs[1].equals(rfs[2]))) { rline = rfs[0] + " ||| " + rfs[1] + " ||| " + rfs[2]; -// System.err.println("Test: " + rline); num_references++; } } -// System.err.println("Order broken."); if (comp.compare(rule, rline) == 0) { -// System.err.println("MATCH: " + rline); found.add(score); } else { missed.add(score); } } gread.close(); - System.err.println("]"); + LOG.error("]"); while (rread.hasNext()) { rread.next(); @@ -155,10 +151,10 @@ public static void main(String[] args) { int num_correct = matched.length; int num_paraphrases = matched.length + unmatched.length; - System.err.println("References: " + num_references); - System.err.println("Matched: " + num_correct); - System.err.println("Unmatched: " + (num_references - num_correct)); - System.err.println("Nonmatching: " + unmatched.length); + LOG.info("References: {}", num_references); + LOG.info("Matched: {}", num_correct); + LOG.info("Unmatched: {}", (num_references - num_correct)); + LOG.info("Nonmatching: {}", unmatched.length); Arrays.sort(matched); Arrays.sort(unmatched); @@ -180,7 +176,7 @@ public static void main(String[] args) { } score_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } } diff --git a/src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java index 74eed19..a4ffb32 100644 --- a/src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java +++ b/src/main/java/edu/jhu/thrax/tools/ParaphraseOverlap.java @@ -5,7 +5,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; @@ -13,7 +15,7 @@ public class ParaphraseOverlap { - private static final Logger logger = Logger.getLogger(ParaphraseOverlap.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(ParaphraseOverlap.class); public static void main(String[] args) { @@ -35,19 +37,19 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (reference_file == null) { - logger.severe("No reference file specified."); + LOG.error("No reference file specified."); return; } if (weight_file == null) { - logger.severe("No weight file specified."); + LOG.error("No weight file specified."); return; } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } @@ -75,7 +77,7 @@ public static void main(String[] args) { ArrayList missed = new ArrayList(); LineReader reader = new LineReader(grammar_file); - System.err.print("["); + LOG.error("["); int rule_count = 0; while (reader.hasNext()) { String rule_line = reader.next().trim(); @@ -92,7 +94,7 @@ public static void main(String[] args) { } if (rule_to_score.containsKey(rule)) { - if (++rule_count % 10000 == 0) System.err.print("-"); + if (++rule_count % 10000 == 0) LOG.error("-"); if (rule_to_score.get(rule) == null) rule_to_score.put(rule, score); @@ -102,7 +104,7 @@ public static void main(String[] args) { missed.add(score); } } - System.err.println("]"); + LOG.error("]"); reader.close(); double[] matched = new double[rule_count]; @@ -120,10 +122,10 @@ public static void main(String[] args) { int num_correct = matched.length; int num_paraphrases = matched.length + unmatched.length; - System.err.println("References: " + num_references); - System.err.println("Matched: " + num_correct); - System.err.println("Unmatched: " + (num_references - num_correct)); - System.err.println("Nonmatching: " + unmatched.length); + LOG.info("References: {}", num_references); + LOG.info("Matched: {}", num_correct); + LOG.info("Unmatched: {}", (num_references - num_correct)); + LOG.info("Nonmatching: {}", unmatched.length); Arrays.sort(matched); Arrays.sort(unmatched); @@ -144,7 +146,7 @@ public static void main(String[] args) { } score_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } } diff --git a/src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java index 17f1edf..e536e4e 100644 --- a/src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java +++ b/src/main/java/edu/jhu/thrax/tools/ParaphraseScore.java @@ -5,7 +5,9 @@ import java.util.HashMap; import java.util.HashSet; import java.util.PriorityQueue; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; @@ -13,12 +15,11 @@ public class ParaphraseScore { - private static final Logger logger = Logger.getLogger(ParaphraseScore.class.getName()); - private static int unknown_source; private static int total; private static int correct; private static int found; + private static final Logger LOG = LoggerFactory.getLogger(ParaphraseScore.class); public static void main(String[] args) { @@ -43,19 +44,19 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (reference_file == null) { - logger.severe("No reference file specified."); + LOG.error("No reference file specified."); return; } if (weight_file == null) { - logger.severe("No weight file specified."); + LOG.error("No weight file specified."); return; } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } @@ -92,7 +93,7 @@ public static void main(String[] args) { if (relevant_file != null) rel_writer = FileManager.getWriter(relevant_file); LineReader reader = new LineReader(grammar_file); - System.err.print("["); + LOG.info("["); int count = 0; while (reader.hasNext()) { String rule_line = reader.next().trim(); @@ -139,7 +140,7 @@ public static void main(String[] args) { score += weights.get(parts[0]) * Double.parseDouble(parts[1]); } - if (++count % 10000 == 0) System.err.print("-"); + if (++count % 10000 == 0) LOG.info("-"); String pair = source + " ||| " + target; @@ -150,7 +151,7 @@ public static void main(String[] args) { candidates.put(pair, Math.max(score, previous)); } } - System.err.println("]"); + LOG.info("]"); reader.close(); if (rel_writer != null) rel_writer.close(); @@ -171,10 +172,10 @@ public static void main(String[] args) { entries.add(new ScoredEntry(p, candidates.get(p))); } - System.err.println("Total: " + total); - System.err.println("Found: " + found); - System.err.println("Correct: " + correct); - System.err.println("Not matching: " + unknown_source); + LOG.info("Total: {}", total); + LOG.info("Found: {}", found); + LOG.info("Correct: {}", correct); + LOG.info("Not matching: {}", unknown_source); BufferedWriter score_writer = FileManager.getWriter(output_file); while (!entries.isEmpty()) { @@ -190,7 +191,7 @@ public static void main(String[] args) { score_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } } diff --git a/src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java b/src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java index b04435e..428f25f 100644 --- a/src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java +++ b/src/main/java/edu/jhu/thrax/tools/ParaphraseWordNet.java @@ -5,15 +5,17 @@ import java.util.HashMap; import java.util.HashSet; import java.util.PriorityQueue; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; import edu.jhu.thrax.util.io.LineReader; public class ParaphraseWordNet { - - private static final Logger logger = Logger.getLogger(ParaphraseWordNet.class.getName()); + + private static final Logger LOG = LoggerFactory.getLogger(ParaphraseWordNet.class); public static void main(String[] args) { @@ -38,19 +40,19 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (reference_file == null) { - logger.severe("No reference file specified."); + LOG.error("No reference file specified."); return; } if (weight_file == null) { - logger.severe("No weight file specified."); + LOG.error("No weight file specified."); return; } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); return; } @@ -82,7 +84,7 @@ public static void main(String[] args) { if (relevant_file != null) rel_writer = FileManager.getWriter(relevant_file); LineReader reader = new LineReader(grammar_file); - System.err.print("["); + LOG.info("["); int count = 0; while (reader.hasNext()) { String rule_line = reader.next().trim(); @@ -114,7 +116,7 @@ public static void main(String[] args) { score += weights.get(parts[0]) * Double.parseDouble(parts[1]); } - if (++count % 10000 == 0) System.err.print("-"); + if (++count % 10000 == 0) LOG.info("-"); String candidate = lhs + " ||| " + source + " ||| " + target; @@ -125,7 +127,7 @@ public static void main(String[] args) { candidates.put(candidate, Math.max(score, previous)); } } - System.err.println("]"); + LOG.info("]"); reader.close(); if (rel_writer != null) rel_writer.close(); @@ -140,9 +142,9 @@ public static void main(String[] args) { entries.add(new ScoredEntry(p, candidates.get(p))); } - System.err.println("References : " + num_references); - System.err.println("Paraphrases: " + num_paraphrases); - System.err.println("Correct: " + num_correct); + LOG.info("References : {}", num_references); + LOG.info("Paraphrases: {}", num_paraphrases); + LOG.info("Correct: {}", num_correct); BufferedWriter score_writer = FileManager.getWriter(output_file); while (!entries.isEmpty()) { @@ -156,7 +158,7 @@ public static void main(String[] args) { } score_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } } diff --git a/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java b/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java index b6b5e98..601fa34 100644 --- a/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java +++ b/src/main/java/edu/jhu/thrax/tools/SequenceToGrammar.java @@ -1,32 +1,34 @@ package edu.jhu.thrax.tools; import java.io.BufferedWriter; -import java.util.logging.Logger; +import java.util.Locale; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.io.SequenceFile.Reader.Option; import edu.jhu.jerboa.util.FileManager; public class SequenceToGrammar { - private static final Logger logger = Logger.getLogger(SequenceToGrammar.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(SequenceToGrammar.class); private static void usage() { - System.err.println("Usage: java edu.jhu.thrax.tools.SequenceToGrammar"); - System.err.println("\t -i sequence_file \t Sequence file from Thrax grammar extraction."); - System.err.println("\t -o output_file \t Output grammar file name."); - System.err.println(); + String usage = "Usage: java edu.jhu.thrax.tools.SequenceToGrammar" + + "\t -i sequence_file \t Sequence file from Thrax grammar extraction." + + "\t -o output_file \t Output grammar file name."; + LOG.error(usage); } public static void main(String[] args) throws Exception { String input_file = null; String output_file = null; - if (args.length < 4 || args[0].toLowerCase().equals("-h")) { + if (args.length < 4 || args[0].toLowerCase(Locale.ROOT).equals("-h")) { usage(); System.exit(0); } @@ -38,12 +40,12 @@ public static void main(String[] args) throws Exception { } } if (input_file == null) { - logger.severe("No input file specified."); + LOG.error("No input file specified."); usage(); System.exit(0); } if (output_file == null) { - logger.severe("No output file specified."); + LOG.error("No output file specified."); usage(); System.exit(0); } @@ -63,6 +65,6 @@ public static void main(String[] args) throws Exception { } reader.close(); grammar_writer.close(); - System.err.println("Merged " + rule_count + " rules."); + LOG.info("Merged {} rules.", rule_count); } } diff --git a/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java b/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java index 805b53a..f9c6d20 100644 --- a/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java +++ b/src/main/java/edu/jhu/thrax/tools/SequenceToSignatures.java @@ -4,19 +4,21 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectOutputStream; -import java.util.logging.Logger; +import java.util.Locale; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.Reader.Option; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.hadoop.distributional.SignatureWritable; public class SequenceToSignatures { - - private static final Logger logger = Logger.getLogger(SequenceToSignatures.class.getName()); + + private static final Logger LOG = LoggerFactory.getLogger(SequenceToSignatures.class); private static void writeConfig(String config_file, int num_bits) throws IOException { ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(config_file)); @@ -28,11 +30,11 @@ private static void writeConfig(String config_file, int num_bits) throws IOExcep } private static void usage() { - System.err.println("Usage: java edu.jhu.thrax.tools.SequenceToSignature"); - System.err.println("\t -i sequence_file \t Sequence file from Thrax signature extraction."); - System.err.println("\t -o prefix \t\t Prefix for signature files: .0001.keyz.gz."); - System.err.println("\t -c chunk_size \t\t Number of keys per signature chunk."); - System.err.println(); + String usage = "Usage: java edu.jhu.thrax.tools.SequenceToSignature" + + "\t -i sequence_file \t Sequence file from Thrax signature extraction." + + "\t -o prefix \t\t Prefix for signature files: .0001.keyz.gz." + + "\t -c chunk_size \t\t Number of keys per signature chunk."; + LOG.error(usage); } public static void main(String[] args) throws Exception { @@ -41,7 +43,7 @@ public static void main(String[] args) throws Exception { int chunk_size = 500000; String output_prefix = null; - if (args.length < 4 || args[0].toLowerCase().equals("-h")) { + if (args.length < 4 || args[0].toLowerCase(Locale.ROOT).equals("-h")) { usage(); System.exit(0); } @@ -55,17 +57,17 @@ public static void main(String[] args) throws Exception { } } if (input_file == null) { - logger.severe("No input file specified."); + LOG.error("No input file specified."); usage(); System.exit(0); } if (output_prefix == null) { - logger.severe("No output prefix specified."); + LOG.error("No output prefix specified."); usage(); System.exit(0); } - logger.info("Looking for " + input_file + " on " + (local ? "local filesystem" : "HDFS") + "."); + LOG.error("Looking for {} on {} .", input_file, (local ? "local filesystem" : "HDFS")); Configuration config = new Configuration(); SignatureWritable signature = new SignatureWritable(); @@ -98,7 +100,7 @@ public static void main(String[] args) throws Exception { bytes_out.close(); strengths_writer.close(); } - String chunk_tag = String.format("-%05d", chunk_id); + String chunk_tag = String.format(Locale.ROOT, "-%05d", chunk_id); writeConfig(output_prefix + chunk_tag + ".config", signature.bytes.length * 8); bytes_out = new FileOutputStream(output_prefix + chunk_tag + ".bytes"); strengths_writer = FileManager.getWriter(output_prefix + chunk_tag + ".strengths.gz"); diff --git a/src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java b/src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java index 7b0befc..fa17a12 100644 --- a/src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java +++ b/src/main/java/edu/jhu/thrax/tools/SplitAndFilter.java @@ -4,7 +4,9 @@ import java.io.IOException; import java.util.HashMap; import java.util.HashSet; -import java.util.logging.Logger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.jerboa.util.FileManager; import edu.jhu.thrax.util.FormatUtils; @@ -12,7 +14,7 @@ public class SplitAndFilter { - private static final Logger logger = Logger.getLogger(SplitAndFilter.class.getName()); + private static final Logger LOG = LoggerFactory.getLogger(SplitAndFilter.class); @SuppressWarnings("unchecked") public static void main(String[] args) { @@ -32,15 +34,15 @@ public static void main(String[] args) { } if (grammar_file == null) { - logger.severe("No grammar specified."); + LOG.error("No grammar specified."); return; } if (filter_file == null) { - logger.severe("No filter file specified."); + LOG.error("No filter file specified."); return; } if (output_prefix == null) { - logger.severe("No output prefix specified."); + LOG.error("No output prefix specified."); return; } @@ -57,7 +59,7 @@ public static void main(String[] args) { } filter_reader.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } try { @@ -164,8 +166,7 @@ public static void main(String[] args) { } syn_count++; } catch (Exception e) { - logger.warning(e.getMessage()); - logger.warning(rule_line); + LOG.error("{} {}", rule_line, e.getMessage()); continue; } } @@ -174,11 +175,11 @@ public static void main(String[] args) { for (String word : stop_count.keySet()) stats_writer.write(word + "\t" + stop_count.get(word) + "\n"); - System.err.println("Total: \t" + (lex_count + phr_count + syn_count + drop_count)); - System.out.println("Dropped:\t" + drop_count); - System.out.println("Lexical:\t" + lex_count); - System.out.println("Phrasal:\t" + phr_count); - System.out.println("Syntactic:\t" + syn_count); + LOG.info("Total: \t{}", (lex_count + phr_count + syn_count + drop_count)); + LOG.info("Dropped:\t{}", drop_count); + LOG.info("Lexical:\t{}", lex_count); + LOG.info("Phrasal:\t{}", phr_count); + LOG.info("Syntactic:\t{}", syn_count); lex_writer.close(); phr_writer.close(); @@ -189,7 +190,7 @@ public static void main(String[] args) { syn_self_writer.close(); stats_writer.close(); } catch (IOException e) { - logger.severe(e.getMessage()); + LOG.error(e.getMessage()); } } diff --git a/src/main/java/edu/jhu/thrax/util/ConfFileParser.java b/src/main/java/edu/jhu/thrax/util/ConfFileParser.java index cf43045..92111c3 100644 --- a/src/main/java/edu/jhu/thrax/util/ConfFileParser.java +++ b/src/main/java/edu/jhu/thrax/util/ConfFileParser.java @@ -1,6 +1,7 @@ package edu.jhu.thrax.util; import java.net.URI; +import java.nio.charset.Charset; import java.util.HashMap; import java.util.Map; import java.util.Scanner; @@ -22,11 +23,11 @@ public static Map parse(String confName) try { URI configURI = new URI(confName); String scheme = configURI.getScheme(); - if (scheme != null && (scheme.equalsIgnoreCase("s3n") || scheme.equalsIgnoreCase("s3"))) { - scanner = new Scanner(AmazonConfigFileLoader.getConfigStream(configURI)); + if (scheme != null && (scheme.equalsIgnoreCase("s3n") || scheme.equalsIgnoreCase("s3"))) { + scanner = new Scanner(AmazonConfigFileLoader.getConfigStream(configURI), "UTF-8"); } else { - scanner = new Scanner(DefaultConfigFileLoader.getConfigStream(configURI)); + scanner = new Scanner(DefaultConfigFileLoader.getConfigStream(configURI), "UTF-8"); } } catch (Exception e) { throw new IllegalArgumentException(e.toString()); diff --git a/src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java b/src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java index 6be3f0f..76f6d8b 100644 --- a/src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java +++ b/src/main/java/edu/jhu/thrax/util/CreateGlueGrammar.java @@ -3,10 +3,16 @@ import java.io.File; import java.io.IOException; import java.util.HashSet; +import java.util.Locale; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import edu.jhu.thrax.util.io.LineReader; public class CreateGlueGrammar { + + private static final Logger LOG = LoggerFactory.getLogger(CreateGlueGrammar.class); private static HashSet nts; // [GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1 @@ -28,12 +34,12 @@ public static void main(String[] argv) throws IOException { if (argv.length > 1) GOAL = argv[1]; if (grammar_file_name == null) { - System.err.println("No grammar specified."); + LOG.error("No grammar specified."); System.exit(1); } File grammar_file = new File(grammar_file_name); if (!grammar_file.exists()) { - System.err.println("Grammar file doesn't exist: " + grammar_file_name); + LOG.error("Grammar file doesn't exist: {}", grammar_file_name); System.exit(1); } @@ -51,7 +57,7 @@ public static void main(String[] argv) throws IOException { int lhsStart = line.indexOf("[") + 1; int lhsEnd = line.indexOf("]"); if (lhsStart < 1 || lhsEnd < 0) { - System.err.printf("malformed rule: %s\n", line); + LOG.error("malformed rule: {}\n", line); continue; } String lhs = line.substring(lhsStart, lhsEnd); @@ -59,12 +65,12 @@ public static void main(String[] argv) throws IOException { } } - System.out.println(String.format(R_START, GOAL)); + LOG.info(String.format(Locale.ROOT, R_START, GOAL)); for (String nt : nts) - System.out.println(String.format(R_TWO, GOAL, nt)); - System.out.println(String.format(R_END, GOAL)); + LOG.info(String.format(Locale.ROOT, R_TWO, GOAL, nt)); + LOG.info(String.format(Locale.ROOT, R_END, GOAL)); for (String nt : nts) - System.out.println(String.format(R_TOP, GOAL, nt)); + LOG.info(String.format(Locale.ROOT, R_TOP, GOAL, nt)); } } diff --git a/src/main/java/edu/jhu/thrax/util/FormatUtils.java b/src/main/java/edu/jhu/thrax/util/FormatUtils.java index c3a3e02..3a162ef 100644 --- a/src/main/java/edu/jhu/thrax/util/FormatUtils.java +++ b/src/main/java/edu/jhu/thrax/util/FormatUtils.java @@ -1,5 +1,6 @@ package edu.jhu.thrax.util; +import java.util.Locale; import java.util.Map; import java.util.regex.Pattern; @@ -23,7 +24,7 @@ public class FormatUtils { */ private static final String DELIMITER_REGEX = " \\|\\|\\| "; - public static final String DELIM = String.format(" %s ", DELIMITER); + public static final String DELIM = String.format(Locale.ROOT, " %s ", DELIMITER); public static final Pattern P_DELIM = Pattern.compile(DELIMITER_REGEX); public static final Pattern P_SPACE = Pattern.compile("\\s+"); @@ -130,10 +131,10 @@ public static Text ruleToText(RuleWritable r, Map fs, boolean if (value == -0.0 || Math.abs(value) < 0.000005) score = "0"; else - score = String.format("%.5f", value); + score = String.format(Locale.ROOT, "%.5f", value); if (sparse && Float.parseFloat(score) == 0) continue; } else if (val instanceof IntWritable) { - score = String.format("%d", ((IntWritable) fs.get(t)).get()); + score = String.format(Locale.ROOT, "%d", ((IntWritable) fs.get(t)).get()); if (sparse && Integer.parseInt(score) == 0) continue; } else if (val instanceof Text) { score = ((Text) fs.get(t)).toString(); @@ -144,9 +145,9 @@ public static Text ruleToText(RuleWritable r, Map fs, boolean throw new RuntimeException("Expecting float, integer, or string feature values."); } if (label) - sb.append(String.format("%s=%s ", t, score)); + sb.append(String.format(Locale.ROOT, "%s=%s ", t, score)); else - sb.append(String.format("%s ", score)); + sb.append(String.format(Locale.ROOT, "%s ", score)); } if (alignment != null) sb.append(DELIMITER + " ").append(alignment + " "); @@ -161,7 +162,7 @@ public static Text contextPhraseToText(Text phrase, Map fs) { sb.append(DELIM); for (Text t : fs.keySet()) { int i = fs.get(t); - if (i != 0) sb.append(String.format("%s=%d ", t, i)); + if (i != 0) sb.append(String.format(Locale.ROOT, "%s=%d ", t, i)); } return new Text(sb.substring(0, sb.length() - 1)); } diff --git a/src/main/java/edu/jhu/thrax/util/GrammarComparison.java b/src/main/java/edu/jhu/thrax/util/GrammarComparison.java index efe111f..2e7da90 100644 --- a/src/main/java/edu/jhu/thrax/util/GrammarComparison.java +++ b/src/main/java/edu/jhu/thrax/util/GrammarComparison.java @@ -6,21 +6,27 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.HashSet; import java.util.Scanner; import java.util.Set; import java.util.zip.GZIPInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class GrammarComparison { + private static final Logger LOG = LoggerFactory.getLogger(GrammarComparison.class); + private static final String SEPARATOR = "|||"; private static final String USAGE = "usage: GrammarComparison "; public static void main(String [] argv) { if (argv.length < 3) { - System.err.println(USAGE); + LOG.error(USAGE); return; } @@ -51,13 +57,13 @@ public static void main(String [] argv) printRules(intersection, outputBase + ".both"); } catch (Exception e) { - e.printStackTrace(); + LOG.error(e.getMessage()); } return; } - private static void printRules(Set rules, String filename) throws FileNotFoundException, SecurityException { - PrintStream ps = new PrintStream(new FileOutputStream(filename)); + private static void printRules(Set rules, String filename) throws FileNotFoundException, SecurityException, UnsupportedEncodingException { + PrintStream ps = new PrintStream(new FileOutputStream(filename), true, "UTF-8"); for (String s : rules) ps.println(s); ps.close(); diff --git a/src/main/java/edu/jhu/thrax/util/HdfsUtils.java b/src/main/java/edu/jhu/thrax/util/HdfsUtils.java new file mode 100644 index 0000000..dcbfc60 --- /dev/null +++ b/src/main/java/edu/jhu/thrax/util/HdfsUtils.java @@ -0,0 +1,49 @@ +// Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved. +package edu.jhu.thrax.util; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class HdfsUtils { + + private HdfsUtils() {}; + + public static void writeObjectToFs(Configuration conf, E object, Path outPath) throws IOException { + FileSystem hdfs = FileSystem.get(conf); + + ObjectOutputStream oos = null; + try { + FSDataOutputStream out = hdfs.create(outPath); + oos = new ObjectOutputStream(out); + oos.writeObject(object); + } finally { + if (oos != null) { + oos.close(); + } + } + } + + public static E readObjectFromFs(Configuration conf, Path inPath) throws IOException,ClassNotFoundException { + FileSystem hdfs = FileSystem.get(conf); + + ObjectInputStream ois = null; + try { + FSDataInputStream in = hdfs.open(inPath); + ois = new ObjectInputStream(in); + @SuppressWarnings("unchecked") + E object = (E) ois.readObject(); + return object; + } finally { + if (ois != null) { + ois.close(); + } + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/jhu/thrax/util/Intersect.java b/src/main/java/edu/jhu/thrax/util/Intersect.java index 4b65791..b0397ee 100644 --- a/src/main/java/edu/jhu/thrax/util/Intersect.java +++ b/src/main/java/edu/jhu/thrax/util/Intersect.java @@ -38,8 +38,8 @@ public static void main(String [] argv) throws Exception scanner = new Scanner(new GZIPInputStream(new FileInputStream(new File(file2))), "UTF-8"); else scanner = new Scanner(new File(file2), "UTF-8"); - PrintStream firstGrammar = new PrintStream(new FileOutputStream(outputPrefix + ".1")); - PrintStream secondGrammar = new PrintStream(new FileOutputStream(outputPrefix + ".2")); + PrintStream firstGrammar = new PrintStream(new FileOutputStream(outputPrefix + ".1"), true, "UTF-8"); + PrintStream secondGrammar = new PrintStream(new FileOutputStream(outputPrefix + ".2"), true, "UTF-8"); while (scanner.hasNextLine()) { String s = scanner.nextLine(); String r = repr(s); diff --git a/src/main/java/edu/jhu/thrax/util/TestSetFilter.java b/src/main/java/edu/jhu/thrax/util/TestSetFilter.java index fa60737..4293cb3 100644 --- a/src/main/java/edu/jhu/thrax/util/TestSetFilter.java +++ b/src/main/java/edu/jhu/thrax/util/TestSetFilter.java @@ -5,7 +5,9 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -18,10 +20,14 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class TestSetFilter { private List testSentences; private Map> sentencesByWord; private Set ngrams; + private static final Logger LOG = LoggerFactory.getLogger(TestSetFilter.class); // for caching of accepted rules private String lastSourceSide; @@ -68,10 +74,10 @@ private void getTestSentences(String filename) { testSentences.add(line); } } catch (FileNotFoundException e) { - System.err.printf("Could not open %s\n", e.getMessage()); + LOG.error("Could not open {}\n", e.getMessage()); } - if (verbose) System.err.println("Added " + testSentences.size() + " sentences.\n"); + if (verbose) LOG.info("Added {} sentences.\n", testSentences.size()); ngrams = getTestNGrams(testSentences); } @@ -108,8 +114,8 @@ public void setSentence(String sentence) { public void filterGrammarToFile(String fullGrammarFile, String sentence, String filteredGrammarFile, boolean fast) { - System.err.println(String.format("filterGrammarToFile(%s,%s,%s,%s)\n", fullGrammarFile, - sentence, filteredGrammarFile, (fast ? "fast" : "exact"))); + LOG.error("filterGrammarToFile({},{},{},{})\n", fullGrammarFile, + sentence, filteredGrammarFile, (fast ? "fast" : "exact")); this.fast = fast; setSentence(sentence); @@ -121,21 +127,22 @@ public void filterGrammarToFile(String fullGrammarFile, String sentence, int rulesIn = 0; int rulesOut = 0; boolean verbose = false; - if (verbose) System.err.println("Processing rules..."); + if (verbose) LOG.info("Processing rules..."); PrintWriter out = - new PrintWriter(new GZIPOutputStream(new FileOutputStream(filteredGrammarFile))); + new PrintWriter( + new OutputStreamWriter( + new GZIPOutputStream( + new FileOutputStream(filteredGrammarFile)), StandardCharsets.UTF_8)); // iterate over all lines in the grammar while (scanner.hasNextLine()) { if (verbose) { if ((rulesIn + 1) % 2000 == 0) { - System.err.print("."); - System.err.flush(); + LOG.info("."); } if ((rulesIn + 1) % 100000 == 0) { - System.err.println(" [" + (rulesIn + 1) + "]"); - System.err.flush(); + LOG.info(" [{}] ", (rulesIn + 1) ); } } rulesIn++; @@ -149,14 +156,14 @@ public void filterGrammarToFile(String fullGrammarFile, String sentence, out.close(); if (verbose) { - System.err.println("[INFO] Total rules read: " + rulesIn); - System.err.println("[INFO] Rules kept: " + rulesOut); - System.err.println("[INFO] Rules dropped: " + (rulesIn - rulesOut)); + LOG.info("[INFO] Total rules read: {}", rulesIn); + LOG.info("[INFO] Rules kept: {}", rulesOut); + LOG.info("[INFO] Rules dropped: {}", (rulesIn - rulesOut)); } } catch (FileNotFoundException e) { - System.err.printf("* FATAL: could not open %s\n", e.getMessage()); + LOG.error("* FATAL: could not open {}\n", e.getMessage()); } catch (IOException e) { - System.err.printf("* FATAL: could not write to %s\n", e.getMessage()); + LOG.error("* FATAL: could not write to {}\n", e.getMessage()); } } @@ -310,11 +317,12 @@ private static String createNGram(String[] tokens, int start, int order) { public static void main(String[] argv) { // do some setup if (argv.length < 1) { - System.err.println("usage: TestSetFilter [-v|-p|-f|-n N] [test set2 ...]"); - System.err.println(" -v verbose output"); - System.err.println(" -p parallel compatibility"); - System.err.println(" -f fast mode"); - System.err.println(" -n max n-gram to compare to (default 12)"); + String usage = ("usage: TestSetFilter [-v|-p|-f|-n N] [test set2 ...]\n" + + " -v verbose output\n" + + " -p parallel compatibility\n" + + " -f fast mode\n" + + " -n max n-gram to compare to (default 12)\n"); + LOG.error(usage); return; } @@ -343,38 +351,35 @@ public static void main(String[] argv) { int rulesIn = 0; int rulesOut = 0; if (filter.verbose) { - System.err.println("Processing rules..."); - if (filter.fast) System.err.println("Using fast version..."); - System.err.println("Using at max " + filter.RULE_LENGTH + " n-grams..."); + LOG.info("Processing rules..."); + if (filter.fast) LOG.info("Using fast version..."); + LOG.info("Using at max {} n-grams...", filter.RULE_LENGTH); } while (scanner.hasNextLine()) { if (filter.verbose) { if ((rulesIn + 1) % 2000 == 0) { - System.err.print("."); - System.err.flush(); + LOG.info("."); } if ((rulesIn + 1) % 100000 == 0) { - System.err.println(" [" + (rulesIn + 1) + "]"); - System.err.flush(); + LOG.info(" [{}]", (rulesIn + 1)); } } rulesIn++; String rule = scanner.nextLine(); if (filter.inTestSet(rule)) { - System.out.println(rule); - if (filter.parallel) System.out.flush(); + LOG.info(rule); + if (filter.parallel); rulesOut++; } else if (filter.parallel) { - System.out.println(""); - System.out.flush(); + LOG.info(""); } } if (filter.verbose) { - System.err.println("[INFO] Total rules read: " + rulesIn); - System.err.println("[INFO] Rules kept: " + rulesOut); - System.err.println("[INFO] Rules dropped: " + (rulesIn - rulesOut)); - System.err.println("[INFO] cached queries: " + filter.cached); + LOG.info("[INFO] Total rules read: {}", rulesIn); + LOG.info("[INFO] Rules kept: {}", rulesOut); + LOG.info("[INFO] Rules dropped: {}", (rulesIn - rulesOut)); + LOG.info("[INFO] cached queries: {}", filter.cached); } return; diff --git a/src/main/java/edu/jhu/thrax/util/io/LineReader.java b/src/main/java/edu/jhu/thrax/util/io/LineReader.java index 77d6f50..a616dae 100644 --- a/src/main/java/edu/jhu/thrax/util/io/LineReader.java +++ b/src/main/java/edu/jhu/thrax/util/io/LineReader.java @@ -25,15 +25,19 @@ import java.util.NoSuchElementException; import java.util.zip.GZIPInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * This class provides an Iterator interface to a BufferedReader. This covers the most common * use-cases for reading from files without ugly code to check whether we got a line or not. * * @author wren ng thornton - * @version $LastChangedDate: 2009-03-26 15:06:57 -0400 (Thu, 26 Mar 2009) $ */ public class LineReader implements Reader { + private static final Logger LOG = LoggerFactory.getLogger(LineReader.class); + /* * Note: charset name is case-agnostic "UTF-8" is the canonical name "UTF8", "unicode-1-1-utf-8" * are aliases Java doesn't distinguish utf8 vs UTF-8 like Perl does @@ -282,7 +286,7 @@ public int countLines() throws IOException { /** Example usage code. */ public static void main(String[] args) { if (1 != args.length) { - System.out.println("Usage: java LineReader filename"); + LOG.error("Usage: java LineReader filename"); System.exit(1); } @@ -292,7 +296,7 @@ public static void main(String[] args) { try { for (String line : in) { - System.out.println(line); + LOG.info(line); } } finally { @@ -300,7 +304,7 @@ public static void main(String[] args) { } } catch (IOException e) { - e.printStackTrace(); + LOG.error(e.getMessage()); } } } diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties new file mode 100644 index 0000000..eb52bc6 --- /dev/null +++ b/src/main/resources/log4j.properties @@ -0,0 +1,65 @@ +# Define some default values that can be overridden by system properties +hadoop.log.dir=. +hadoop.log.file=hadoop.log + +# RootLogger - DailyRollingFileAppender +log4j.rootLogger=INFO,DRFA + +# Logging Threshold +log4j.threshold=ALL + +#special logging requirements for some commandline tools... + +log4j.logger.org.apache.hadoop=WARN + +# +# Daily Rolling File Appender +# + +log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender +log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Rollver at midnight +log4j.appender.DRFA.DatePattern=.yyyy-MM-dd + +# 30-day backup +#log4j.appender.DRFA.MaxBackupIndex=30 +log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout + +# Pattern format: Date LogLevel LoggerName LogMessage +log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n +# Debugging Pattern format: Date LogLevel LoggerName (FileName:MethodName:LineNo) LogMessage +#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + + +# +# stdout +# Add *stdout* to rootlogger above if you want to use this +# + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n + +# +# plain layout used for commandline tools to output to console +# +log4j.appender.cmdstdout=org.apache.log4j.ConsoleAppender +log4j.appender.cmdstdout.layout=org.apache.log4j.PatternLayout +log4j.appender.cmdstdout.layout.ConversionPattern=%m%n + +# +# Rolling File Appender +# + +#log4j.appender.RFA=org.apache.log4j.RollingFileAppender +#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} + +# Logfile size and and 30-day backups +#log4j.appender.RFA.MaxFileSize=1MB +#log4j.appender.RFA.MaxBackupIndex=30 + +#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n +#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +