Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby-3.3.0
57 changes: 31 additions & 26 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
GEM
remote: http://rubygems.org/
specs:
addressable (2.4.0)
diff-lcs (1.2.5)
docile (1.1.5)
addressable (2.8.7)
public_suffix (>= 2.0.2, < 7.0)
diff-lcs (1.5.1)
docile (1.4.0)
domainatrix (0.0.11)
addressable
hoe (3.14.2)
rake (>= 0.8, < 11.0)
json (1.8.3)
hoe (4.2.2)
rake (>= 0.8, < 15.0)
one_hundred_percent_coverage (0.0.2)
simplecov (>= 0.3.7)
rake (10.5.0)
rdoc (4.2.2)
json (~> 1.4)
rspec (3.4.0)
rspec-core (~> 3.4.0)
rspec-expectations (~> 3.4.0)
rspec-mocks (~> 3.4.0)
rspec-core (3.4.3)
rspec-support (~> 3.4.0)
rspec-expectations (3.4.0)
psych (5.1.2)
stringio
public_suffix (6.0.1)
rake (13.2.1)
rdoc (6.7.0)
psych (>= 4.0.0)
rspec (3.13.0)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.0)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.4.0)
rspec-mocks (3.4.1)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.4.0)
rspec-support (3.4.1)
simplecov (0.11.2)
docile (~> 1.1.0)
json (~> 1.8)
simplecov-html (~> 0.10.0)
simplecov-html (0.10.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.1)
simplecov (0.22.0)
docile (~> 1.1)
simplecov-html (~> 0.11)
simplecov_json_formatter (~> 0.1)
simplecov-html (0.12.3)
simplecov_json_formatter (0.1.4)
stringio (3.1.1)

PLATFORMS
ruby
Expand All @@ -45,4 +50,4 @@ DEPENDENCIES
simplecov

BUNDLED WITH
1.11.2
2.5.10
2 changes: 1 addition & 1 deletion README.rdoc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
= Despamilator

* http://github.com/moowahaha/despamilator
* home :: http://github.com/moowahaha/despamilator

== DESCRIPTION:

Expand Down
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Hoe.plugin :newgem
# Generate all the Rake tasks
# Run 'rake -T' to see list of generated tasks (from gem root directory)
$hoe = Hoe.spec 'despamilator' do
self.version = Despamilator::VERSION
self.developer 'Stephen Hardisty', '[email protected]'
self.post_install_message = 'PostInstall.txt'
end
Expand Down
2 changes: 2 additions & 0 deletions lib/despamilator.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
require 'despamilator/version'
require 'despamilator/filter'
Dir.glob(File.join(File.dirname(__FILE__), 'despamilator', 'filter', '*.rb')).each do |filter_file|
require filter_file
end

require 'despamilator/subject'
require 'despamilator/text'
require 'ostruct'

#== SYNOPSIS:
Expand Down
4 changes: 2 additions & 2 deletions lib/despamilator/filter/html_tags.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ def parse subject
text = subject.text.downcase

html_tags.each do |tag|
opening_elements = text.count(/<\s*#{tag}\W/)
closing_elements = text.count(/\W#{tag}\s*\/>/)
opening_elements = Despamilator::Text.count(text, /<\s*#{tag}\W/)
closing_elements = Despamilator::Text.count(text, /\W#{tag}\s*\/>/)

if opening_elements > 0 or closing_elements > 0
safest_element_count = opening_elements > closing_elements ? opening_elements : closing_elements
Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/ip_address_url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def description
def parse subject
subject.register_match!({
:score => 0.5, :filter => self
}) if subject.text.downcase.count(/http:\/\/\d+\.\d+\.\d+\.\d+/) > 0
}) if Despamilator::Text.count(subject.text.downcase, /http:\/\/\d+\.\d+\.\d+\.\d+/) > 0
end

end
Expand Down
5 changes: 4 additions & 1 deletion lib/despamilator/filter/long_words.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ def description
end

def parse subject
subject.text.without_uris.words.each do |word|
without_uris = Despamilator::Text.without_uris(subject.text)
words = Despamilator::Text.words(without_uris)

words.each do |word|
subject.register_match!({
:score => 0.1, :filter => self
}) if word.length > 20
Expand Down
9 changes: 6 additions & 3 deletions lib/despamilator/filter/mixed_case.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ def description
end

def parse subject
text = subject.text.without_uris
count = text.remove_and_count!(/[a-z][A-Z]/)
count += text.remove_and_count!(/[a-z][A-Z][a-z]/)
text = Despamilator::Text.without_uris(subject.text)
text, count1 = Despamilator::Text.remove_and_count(text, /[a-z][A-Z]/)
_, count2 = Despamilator::Text.remove_and_count(text, /[a-z][A-Z][a-z]/)

count = count1 + count2

subject.register_match!({:score => 0.1 * count, :filter => self}) if count > 0
end

Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/numbers_and_words.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def description
private

def tidy_text subject
text = subject.text.without_uris
text = Despamilator::Text.without_uris(subject.text)
text.downcase!

# strip out "good numbers"
Expand Down
4 changes: 2 additions & 2 deletions lib/despamilator/filter/obfuscated_urls.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def description
end

def parse subject
text = subject.text.without_uris.downcase
text = Despamilator::Text.without_uris(subject.text.downcase)
count = find_space_separated_parts text
count += find_space_separated_characters text

Expand All @@ -21,7 +21,7 @@ def parse subject
private

def find_space_separated_parts text
text.count(/www\s+\w+\s+com/)
Despamilator::Text.count(text, /www\s+\w+\s+com/)
end

def find_space_separated_characters text
Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/prices.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def description
end

def parse subject
price_count = subject.text.count(/\$\s*\d+/)
price_count = Despamilator::Text.count(subject.text,/\$\s*\d+/)
subject.register_match!({:score => 0.075 * price_count, :filter => self}) if price_count > 0
end

Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/shouting.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def parse subject
return if text.length < 20

uppercased = text.scan(/[A-Z][A-Z]+/).join.length
lowercased = text.count(/[a-z]/)
lowercased = Despamilator::Text.count(text, /[a-z]/)

if uppercased > 0
subject.register_match!({
Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/spammy_tlds.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def description
end

def parse subject
matches = subject.text.count(/\w{5,}\.(info|biz|xxx)\b/)
matches = Despamilator::Text.count(subject.text, /\w{5,}\.(info|biz|xxx)\b/)
subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
end

Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/trailing_number.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def description
end

def parse subject
subject.register_match!({:score => 0.1, :filter => self}) if subject.text.without_uris =~ /\b\d+\s*$/
subject.register_match!({:score => 0.1, :filter => self}) if Despamilator::Text.without_uris(subject.text) =~ /\b\d+\s*$/
end

end
Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/unusual_characters.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def description

def parse subject
initialize_combos
tokenize(subject.text.without_uris).each do |token|
tokenize(Despamilator::Text.without_uris(subject.text)).each do |token|
subject.register_match!({:score => 0.05, :filter => self}) if @@combos[token.to_sym]
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/despamilator/filter/urls.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def description

def parse subject
text = subject.text.downcase.gsub(/http:\/\/\d+\.\d+\.\d+\.\d+/, '')
matches = text.count(/https?:\/\//)
matches = Despamilator::Text.count(text, /https?:\/\/\S+/)
1.upto(matches > 2 ? 2 : matches) do
subject.register_match!({:score => 0.4, :filter => self})
end
Expand Down
19 changes: 11 additions & 8 deletions lib/despamilator/filter/weird_punctuation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,21 @@ def description
end

def parse subject
text = subject.text.without_uris.downcase
text = Despamilator::Text.without_uris(subject.text).downcase

text.gsub!(/\w&\w/, 'xx')
text.gsub!(/[a-z](!|\?)(\s|$)/, 'x')
text.gsub!(/(?:#{punctuation}){20,}/, '')
matches = text.remove_and_count!(/(?:\W|\s|^)(#{punctuation})/)
matches += text.remove_and_count!(/\w,\w/)
matches += text.remove_and_count!(/\w\w\.\w/)
matches += text.remove_and_count!(/\w\.\w\w/)
matches += text.remove_and_count!(/(#{punctuation})(#{punctuation})/)
matches += text.remove_and_count!(/(#{punctuation})$/)
matches += text.remove_and_count!(/(?:\W|\s|^)\d+(#{punctuation})/)

text, matches1 = Despamilator::Text.remove_and_count(text, /(?:\W|\s|^)(#{punctuation})/)
text, matches2 = Despamilator::Text.remove_and_count(text, /\w,\w/)
text, matches3 = Despamilator::Text.remove_and_count(text, /\w\w\.\w/)
text, matches4 = Despamilator::Text.remove_and_count(text, /\w\.\w\w/)
text, matches5 = Despamilator::Text.remove_and_count(text, /(#{punctuation})(#{punctuation})/)
text, matches6 = Despamilator::Text.remove_and_count(text, /(#{punctuation})$/)
_, matches7 = Despamilator::Text.remove_and_count(text, /(?:\W|\s|^)\d+(#{punctuation})/)

matches = matches1 + matches2 + matches3 + matches4 + matches5 + matches6 + matches7

subject.register_match!({:score => 0.03 * matches, :filter => self}) if matches > 0
end
Expand Down
4 changes: 1 addition & 3 deletions lib/despamilator/subject.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
require 'despamilator/subject/text'

class Despamilator
class Subject
attr_reader :score, :text

def initialize text
@score = 0.0
@matches = {}
@text = Despamilator::Subject::Text.new(text)
@text = text
end

def register_match! details
Expand Down
32 changes: 0 additions & 32 deletions lib/despamilator/subject/text.rb

This file was deleted.

22 changes: 22 additions & 0 deletions lib/despamilator/text.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
require "uri"

class Despamilator
module Text
def self.without_uris(text)
text.gsub(/\b(?:https?|mailto|ftp):.+?(\s|$)/i, "")
end

def self.words(text)
text.split(/\W+/)
end

def self.count(text, pattern)
text.scan(pattern).flatten.compact.length
end

def self.remove_and_count(text, pattern)
count = count(text, pattern)
[text.gsub(pattern, ""), count]
end
end
end
4 changes: 3 additions & 1 deletion spec/despamilator_spec.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require "helpers/spec_helper"

describe Despamilator do

before do
Expand All @@ -15,7 +17,7 @@
context :matched_by do

before do
@dspam.should_receive(:warn).with(/matched_by is deprecated/)
expect(@dspam).to receive(:warn).with(/matched_by is deprecated/)
@gtubs = @dspam.matched_by { |f| f.class == DespamilatorFilter::GtubsTestFilter }.collect.first
end

Expand Down
6 changes: 6 additions & 0 deletions spec/helpers/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@
Dir.glob(File.join(File.dirname(__FILE__), '*.rb')).each do |file|
require file
end

RSpec.configure do |config|
config.expect_with :rspec do |c|
c.syntax = [:should, :expect]
end
end
Loading