-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsanitize.rb
More file actions
46 lines (38 loc) · 1.84 KB
/
sanitize.rb
File metadata and controls
46 lines (38 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
require 'htmlentities'
require 'sanitize'
$htmlCoder = nil
###################################################################################################
# Translate HTML entites, e.g. "’" to curly quote, or "é" to an e with acute accent.
def translateEntities(htmlFragment)
# Decode HTML entities
$htmlCoder or $htmlCoder = HTMLEntities.new
return $htmlCoder.decode(htmlFragment)
end
###################################################################################################
# The first line of defense against unwanted or unsafe HTML is the WYSIWIG editor's built-in
# filtering. However, since this is an API we cannot rely on that. This is the second line of
# defense.
def sanitizeHTML(htmlFragment)
htmlFragment.nil? and return
# Shorten escholarship.org links to top-level relative
htmlFragment.gsub! %r{ (href|src)="https?://(pub-jschol[^\."]+\.|www\.|beta\.)?escholarship.org/?([^"]*)"}, ' \1="/\3"'
if htmlFragment =~ /&/
# Translate all entities.
htmlFragment = translateEntities(htmlFragment)
end
if htmlFragment =~ /</
# Normalize tag names
htmlFragment = htmlFragment.gsub("<super>", "<sup>").gsub("</super>", "</sup>").
gsub("<subscript>", "<sub>").gsub("</subscript>", "</sub>").
gsub("<italic>", "<i>").gsub("</italic>", "</i>").
gsub("<bold>", "<b>").gsub("</bold>", "</b>")
end
# Sanitize the result
return Sanitize.fragment(htmlFragment,
elements: %w{b em i strong u} +
%w{a br li ol p blockquote h1 h2 h3 h4 small strike sub sup ul hr img},
attributes: { "a" => ['href'], "img" => ['src', 'alt'] },
protocols: { "a" => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
"img" => {'src' => ['http', 'https', :relative]} }
).strip
end