diff --git a/.gitignore b/.gitignore index 81028cd2..a21e2cb6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ target # IntelliJ files *.iml -.idea \ No newline at end of file +.idea +tmp/ \ No newline at end of file diff --git a/pom.xml b/pom.xml index ee1ae63f..24edcdd6 100644 --- a/pom.xml +++ b/pom.xml @@ -1,155 +1,185 @@ - - 4.0.0 - - com.vdurmont - emoji-java - 5.1.1 - jar - - emoji-java - https://github.com/vdurmont/emoji-java - The missing emoji library for Java. - - - scm:git:git@github.com:vdurmont/emoji-java.git - scm:git:git@github.com:vdurmont/emoji-java.git - git@github.com:vdurmont/emoji-java.git - - - - - Vincent DURMONT - vdurmont@gmail.com - http://www.vincent-durmont.com - - - - - - The MIT License - http://www.opensource.org/licenses/mit-license.php - repo - - - - - UTF-8 - - - - - org.json - json - 20170516 - - - - junit - junit - 4.13 - test - - - - - - - - org.codehaus.mojo - cobertura-maven-plugin - 2.5.2 - - xml - 256m - true - - - - org.eluder.coveralls - coveralls-maven-plugin - 2.2.0 - - - - - - - - release - - - - org.apache.maven.plugins - maven-source-plugin - 2.3 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.9.1 - - UTF-8 - UTF-8 - public - - - - attach-javadocs - - jar - - - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.6 - - - sign-artifacts - verify - - sign - - - - - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.7 - true - - ossrh - https://oss.sonatype.org/ - true - - - - - - - - - - ossrh - Sonatype Nexus Snapshots - https://oss.sonatype.org/content/repositories/snapshots/ - - - ossrh - Nexus Release Repository - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - - - + + 4.0.0 + + com.vdurmont + emoji-java + 5.1.1 + jar + + emoji-java + https://github.com/vdurmont/emoji-java + The missing emoji library for Java. + + + scm:git:git@github.com:vdurmont/emoji-java.git + scm:git:git@github.com:vdurmont/emoji-java.git + git@github.com:vdurmont/emoji-java.git + + + + + Vincent DURMONT + vdurmont@gmail.com + http://www.vincent-durmont.com + + + + + + The MIT License + http://www.opensource.org/licenses/mit-license.php + repo + + + + + UTF-8 + 17 + 17 + + + + + org.json + json + 20170516 + + + + junit + junit + 4.13 + test + + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.8 + + + org.codehaus.mojo + cobertura-maven-plugin + 2.5.2 + + xml + 256m + true + + + + org.eluder.coveralls + coveralls-maven-plugin + 2.2.0 + + + + + + org.jacoco + jacoco-maven-plugin + + + + prepare-agent + + + + report + test + + report + + + + + + + + + + + + + release + + + + org.apache.maven.plugins + maven-source-plugin + 2.3 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + UTF-8 + UTF-8 + public + + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.6 + + + sign-artifacts + verify + + sign + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.7 + true + + ossrh + https://oss.sonatype.org/ + true + + + + + + + + + + ossrh + Sonatype Nexus Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + + ossrh + Nexus Release Repository + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + + + \ No newline at end of file diff --git a/src/main/java/com/vdurmont/emoji/Emoji.java b/src/main/java/com/vdurmont/emoji/Emoji.java index dfb6924f..9b78ce3f 100644 --- a/src/main/java/com/vdurmont/emoji/Emoji.java +++ b/src/main/java/com/vdurmont/emoji/Emoji.java @@ -1,214 +1,230 @@ -package com.vdurmont.emoji; - -import java.io.UnsupportedEncodingException; -import java.util.Collections; -import java.util.List; - -/** - * This class represents an emoji.
- *
- * This object is immutable so it can be used safely in a multithreaded context. - * - * @author Vincent DURMONT [vdurmont@gmail.com] - */ -public class Emoji { - private final String description; - private final boolean supportsFitzpatrick; - private final List aliases; - private final List tags; - private final String unicode; - private final String htmlDec; - private final String htmlHex; - - /** - * Constructor for the Emoji. - * - * @param description The description of the emoji - * @param supportsFitzpatrick Whether the emoji supports Fitzpatrick modifiers - * @param aliases the aliases for this emoji - * @param tags the tags associated with this emoji - * @param bytes the bytes that represent the emoji - */ - protected Emoji( - String description, - boolean supportsFitzpatrick, - List aliases, - List tags, - byte... bytes - ) { - this.description = description; - this.supportsFitzpatrick = supportsFitzpatrick; - this.aliases = Collections.unmodifiableList(aliases); - this.tags = Collections.unmodifiableList(tags); - - int count = 0; - try { - this.unicode = new String(bytes, "UTF-8"); - int stringLength = getUnicode().length(); - String[] pointCodes = new String[stringLength]; - String[] pointCodesHex = new String[stringLength]; - - for (int offset = 0; offset < stringLength; ) { - final int codePoint = getUnicode().codePointAt(offset); - - pointCodes[count] = String.format("&#%d;", codePoint); - pointCodesHex[count++] = String.format("&#x%x;", codePoint); - - offset += Character.charCount(codePoint); - } - this.htmlDec = stringJoin(pointCodes, count); - this.htmlHex = stringJoin(pointCodesHex, count); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - - /** - * Method to replace String.join, since it was only introduced in java8 - * @param array the array to be concatenated - * @return concatenated String - */ - private String stringJoin(String[] array, int count){ - String joined = ""; - for(int i = 0; i < count; i++) - joined += array[i]; - return joined; - } - - /** - * Returns the description of the emoji - * - * @return the description - */ - public String getDescription() { - return this.description; - } - - /** - * Returns wether the emoji supports the Fitzpatrick modifiers or not - * - * @return true if the emoji supports the Fitzpatrick modifiers - */ - public boolean supportsFitzpatrick() { - return this.supportsFitzpatrick; - } - - /** - * Returns the aliases of the emoji - * - * @return the aliases (unmodifiable) - */ - public List getAliases() { - return this.aliases; - } - - /** - * Returns the tags of the emoji - * - * @return the tags (unmodifiable) - */ - public List getTags() { - return this.tags; - } - - /** - * Returns the unicode representation of the emoji - * - * @return the unicode representation - */ - public String getUnicode() { - return this.unicode; - } - - /** - * Returns the unicode representation of the emoji associated with the - * provided Fitzpatrick modifier.
- * If the modifier is null, then the result is similar to - * {@link Emoji#getUnicode()} - * - * @param fitzpatrick the fitzpatrick modifier or null - * - * @return the unicode representation - * @throws UnsupportedOperationException if the emoji doesn't support the - * Fitzpatrick modifiers - */ - public String getUnicode(Fitzpatrick fitzpatrick) { - if (!this.supportsFitzpatrick()) { - throw new UnsupportedOperationException( - "Cannot get the unicode with a fitzpatrick modifier, " + - "the emoji doesn't support fitzpatrick." - ); - } else if (fitzpatrick == null) { - return this.getUnicode(); - } - return this.getUnicode() + fitzpatrick.unicode; - } - - /** - * Returns the HTML decimal representation of the emoji - * - * @return the HTML decimal representation - */ - public String getHtmlDecimal() { - return this.htmlDec; - } - - /** - * @deprecated identical to {@link #getHtmlHexadecimal()} for - * backwards-compatibility. Use that instead. - * - * @return the HTML hexadecimal representation - */ - public String getHtmlHexidecimal() { - return this.getHtmlHexadecimal(); - } - - /** - * Returns the HTML hexadecimal representation of the emoji - * - * @return the HTML hexadecimal representation - */ - public String getHtmlHexadecimal() { - return this.htmlHex; - } - - @Override - public boolean equals(Object other) { - return !(other == null || !(other instanceof Emoji)) && - ((Emoji) other).getUnicode().equals(getUnicode()); - } - - @Override - public int hashCode() { - return unicode.hashCode(); - } - - /** - * Returns the String representation of the Emoji object.
- *
- * Example:
- * Emoji { - * description='smiling face with open mouth and smiling eyes', - * supportsFitzpatrick=false, - * aliases=[smile], - * tags=[happy, joy, pleased], - * unicode='šŸ˜„', - * htmlDec='&#128516;', - * htmlHex='&#x1f604;' - * } - * - * @return the string representation - */ - @Override - public String toString() { - return "Emoji{" + - "description='" + description + '\'' + - ", supportsFitzpatrick=" + supportsFitzpatrick + - ", aliases=" + aliases + - ", tags=" + tags + - ", unicode='" + unicode + '\'' + - ", htmlDec='" + htmlDec + '\'' + - ", htmlHex='" + htmlHex + '\'' + - '}'; - } -} +package com.vdurmont.emoji; + +import java.io.UnsupportedEncodingException; +import java.util.Collections; +import java.util.List; + +/** + * This class represents an emoji.
+ *
+ * This object is immutable so it can be used safely in a multithreaded context. + * + * @author Vincent DURMONT [vdurmont@gmail.com] + */ +public class Emoji { + private final String description; + private final boolean supportsFitzpatrick; + private final List aliases; + private final List tags; + private final String unicode; + private final String htmlDec; + private final String htmlHex; + + /** + * Constructor for the Emoji. + * + * @param description The description of the emoji + * @param supportsFitzpatrick Whether the emoji supports Fitzpatrick modifiers + * @param aliases the aliases for this emoji + * @param tags the tags associated with this emoji + * @param bytes the bytes that represent the emoji + */ + protected Emoji( + String description, + boolean supportsFitzpatrick, + List aliases, + List tags, + byte... bytes + ) { + this.description = description; + this.supportsFitzpatrick = supportsFitzpatrick; + this.aliases = Collections.unmodifiableList(aliases); + this.tags = Collections.unmodifiableList(tags); + + int count = 0; + try { + this.unicode = new String(bytes, "UTF-8"); + int stringLength = getUnicode().length(); + String[] pointCodes = new String[stringLength]; + String[] pointCodesHex = new String[stringLength]; + + for (int offset = 0; offset < stringLength; ) { + final int codePoint = getUnicode().codePointAt(offset); + + pointCodes[count] = String.format("&#%d;", codePoint); + pointCodesHex[count++] = String.format("&#x%x;", codePoint); + + offset += Character.charCount(codePoint); + } + this.htmlDec = stringJoin(pointCodes, count); + this.htmlHex = stringJoin(pointCodesHex, count); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + /** + * Method to replace String.join, since it was only introduced in java8 + * @param array the array to be concatenated + * @return concatenated String + */ + private String stringJoin(String[] array, int count){ + String joined = ""; + for(int i = 0; i < count; i++) + joined += array[i]; + return joined; + } + + /** + * Returns the description of the emoji + * + * @return the description + */ + public String getDescription() { + return this.description; + } + + /** + * Returns wether the emoji supports the Fitzpatrick modifiers or not + * + * @return true if the emoji supports the Fitzpatrick modifiers + */ + public boolean supportsFitzpatrick() { + return this.supportsFitzpatrick; + } + + /** + * Returns the aliases of the emoji + * + * @return the aliases (unmodifiable) + */ + public List getAliases() { + return this.aliases; + } + + /** + * Returns the tags of the emoji + * + * @return the tags (unmodifiable) + */ + public List getTags() { + return this.tags; + } + + /** + * Returns the unicode representation of the emoji + * + * @return the unicode representation + */ + public String getUnicode() { + return this.unicode; + } + + /** + * Returns the unicode representation of the emoji associated with the + * provided Fitzpatrick modifier.
+ * If the modifier is null, then the result is similar to + * {@link Emoji#getUnicode()} + * + * @param fitzpatrick the fitzpatrick modifier or null + * + * @return the unicode representation + * @throws UnsupportedOperationException if the emoji doesn't support the + * Fitzpatrick modifiers + */ + public String getUnicode(Fitzpatrick fitzpatrick) { + if (!this.supportsFitzpatrick()) { + throwUnsupportedOperationExceptionForFitzpatrick(); + } else if (fitzpatrick == null) { + return this.getUnicode(); + } + return this.getUnicode() + fitzpatrick.unicode; + } + + private void throwUnsupportedOperationExceptionForFitzpatrick() { + throw new UnsupportedOperationException( + "Cannot get the unicode with a fitzpatrick modifier, " + + "the emoji doesn't support fitzpatrick." + ); + } + +/* public String getUnicode(Fitzpatrick fitzpatrick) { + if (!this.supportsFitzpatrick()) { + throw new UnsupportedOperationException( + "Cannot get the unicode with a fitzpatrick modifier, " + + "the emoji doesn't support fitzpatrick." + ); + } else if (fitzpatrick == null) { + return this.getUnicode(); + } + return this.getUnicode() + fitzpatrick.unicode; + }*/ + + /** + * Returns the HTML decimal representation of the emoji + * + * @return the HTML decimal representation + */ + public String getHtmlDecimal() { + return this.htmlDec; + } + + /** + * @deprecated identical to {@link #getHtmlHexadecimal()} for + * backwards-compatibility. Use that instead. + * + * @return the HTML hexadecimal representation + */ + public String getHtmlHexidecimal() { + return this.getHtmlHexadecimal(); + } + + /** + * Returns the HTML hexadecimal representation of the emoji + * + * @return the HTML hexadecimal representation + */ + public String getHtmlHexadecimal() { + return this.htmlHex; + } + + @Override + public boolean equals(Object other) { + if (this == other) return true; + if (other == null || getClass() != other.getClass()) return false; + + Emoji otherEmoji = (Emoji) other; + String thisUnicode = getUnicode(); + String otherUnicode = otherEmoji.getUnicode(); + + return thisUnicode.equals(otherUnicode); + } + + @Override + public int hashCode() { + return unicode.hashCode(); + } + + /** + * Returns the String representation of the Emoji object.
+ *
+ * Example:
+ * Emoji { + * description='smiling face with open mouth and smiling eyes', + * supportsFitzpatrick=false, + * aliases=[smile], + * tags=[happy, joy, pleased], + * unicode='šŸ˜„', + * htmlDec='&#128516;', + * htmlHex='&#x1f604;' + * } + * + * @return the string representation + */ + + + @Override + public String toString() { + return EmojiFormatter.formatToString(this); + } +} diff --git a/src/main/java/com/vdurmont/emoji/EmojiFormatter.java b/src/main/java/com/vdurmont/emoji/EmojiFormatter.java new file mode 100644 index 00000000..d81386d7 --- /dev/null +++ b/src/main/java/com/vdurmont/emoji/EmojiFormatter.java @@ -0,0 +1,16 @@ +package com.vdurmont.emoji; + +public class EmojiFormatter { + public static String formatToString(Emoji emoji) { + return "Emoji{" + + "description='" + emoji.getDescription() + '\'' + + ", supportsFitzpatrick=" + emoji.supportsFitzpatrick() + + ", aliases=" + emoji.getAliases() + + ", tags=" + emoji.getTags() + + ", unicode='" + emoji.getUnicode() + '\'' + + ", htmlDec='" + emoji.getHtmlDecimal() + '\'' + + ", htmlHex='" + emoji.getHtmlHexadecimal() + '\'' + + '}'; + + } +} diff --git a/src/main/java/com/vdurmont/emoji/EmojiLoader.java b/src/main/java/com/vdurmont/emoji/EmojiLoader.java index 9658e887..a56e33ba 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiLoader.java +++ b/src/main/java/com/vdurmont/emoji/EmojiLoader.java @@ -1,90 +1,90 @@ -package com.vdurmont.emoji; - -import org.json.JSONArray; -import org.json.JSONObject; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.Scanner; - -/** - * Loads the emojis from a JSON database. - * - * @author Vincent DURMONT [vdurmont@gmail.com] - */ -public class EmojiLoader { - /** - * No need for a constructor, all the methods are static. - */ - private EmojiLoader() {} - - /** - * Loads a JSONArray of emojis from an InputStream, parses it and returns the - * associated list of {@link com.vdurmont.emoji.Emoji}s - * - * @param stream the stream of the JSONArray - * - * @return the list of {@link com.vdurmont.emoji.Emoji}s - * @throws IOException if an error occurs while reading the stream or parsing - * the JSONArray - */ - public static List loadEmojis(InputStream stream) throws IOException { - JSONArray emojisJSON = new JSONArray(inputStreamToString(stream)); - List emojis = new ArrayList(emojisJSON.length()); - for (int i = 0; i < emojisJSON.length(); i++) { - Emoji emoji = buildEmojiFromJSON(emojisJSON.getJSONObject(i)); - if (emoji != null) { - emojis.add(emoji); - } - } - return emojis; - } - - private static String inputStreamToString( - InputStream stream - ) throws IOException { - StringBuilder sb = new StringBuilder(); - InputStreamReader isr = new InputStreamReader(stream, "UTF-8"); - BufferedReader br = new BufferedReader(isr); - String read; - while((read = br.readLine()) != null) { - sb.append(read); - } - br.close(); - return sb.toString(); - } - - protected static Emoji buildEmojiFromJSON( - JSONObject json - ) throws UnsupportedEncodingException { - if (!json.has("emoji")) { - return null; - } - - byte[] bytes = json.getString("emoji").getBytes("UTF-8"); - String description = null; - if (json.has("description")) { - description = json.getString("description"); - } - boolean supportsFitzpatrick = false; - if (json.has("supports_fitzpatrick")) { - supportsFitzpatrick = json.getBoolean("supports_fitzpatrick"); - } - List aliases = jsonArrayToStringList(json.getJSONArray("aliases")); - List tags = jsonArrayToStringList(json.getJSONArray("tags")); - return new Emoji(description, supportsFitzpatrick, aliases, tags, bytes); - } - - private static List jsonArrayToStringList(JSONArray array) { - List strings = new ArrayList(array.length()); - for (int i = 0; i < array.length(); i++) { - strings.add(array.getString(i)); - } - return strings; - } -} +package com.vdurmont.emoji; + +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; + +/** + * Loads the emojis from a JSON database. + * + * @author Vincent DURMONT [vdurmont@gmail.com] + */ +public class EmojiLoader { + /** + * No need for a constructor, all the methods are static. + */ + private EmojiLoader() {} + + /** + * Loads a JSONArray of emojis from an InputStream, parses it and returns the + * associated list of {@link com.vdurmont.emoji.Emoji}s + * + * @param stream the stream of the JSONArray + * + * @return the list of {@link com.vdurmont.emoji.Emoji}s + * @throws IOException if an error occurs while reading the stream or parsing + * the JSONArray + */ + public static List loadEmojis(InputStream stream) throws IOException { + JSONArray emojisJSON = new JSONArray(inputStreamToString(stream)); + List emojis = new ArrayList(emojisJSON.length()); + for (int i = 0; i < emojisJSON.length(); i++) { + Emoji emoji = buildEmojiFromJSON(emojisJSON.getJSONObject(i)); + if (emoji != null) { + emojis.add(emoji); + } + } + return emojis; + } + + private static String inputStreamToString( + InputStream stream + ) throws IOException { + StringBuilder sb = new StringBuilder(); + InputStreamReader isr = new InputStreamReader(stream, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + String read; + while((read = br.readLine()) != null) { + sb.append(read); + } + br.close(); + return sb.toString(); + } + + protected static Emoji buildEmojiFromJSON( + JSONObject json + ) throws UnsupportedEncodingException { + if (!json.has("emoji")) { + return null; + } + + byte[] bytes = json.getString("emoji").getBytes("UTF-8"); + String description = null; + if (json.has("description")) { + description = json.getString("description"); + } + boolean supportsFitzpatrick = false; + if (json.has("supports_fitzpatrick")) { + supportsFitzpatrick = json.getBoolean("supports_fitzpatrick"); + } + List aliases = jsonArrayToStringList(json.getJSONArray("aliases")); + List tags = jsonArrayToStringList(json.getJSONArray("tags")); + return new Emoji(description, supportsFitzpatrick, aliases, tags, bytes); + } + + private static List jsonArrayToStringList(JSONArray array) { + List strings = new ArrayList(array.length()); + for (int i = 0; i < array.length(); i++) { + strings.add(array.getString(i)); + } + return strings; + } +} diff --git a/src/main/java/com/vdurmont/emoji/EmojiManager.java b/src/main/java/com/vdurmont/emoji/EmojiManager.java index 7c3ef729..39ab3d1a 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiManager.java +++ b/src/main/java/com/vdurmont/emoji/EmojiManager.java @@ -1,193 +1,217 @@ -package com.vdurmont.emoji; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * Holds the loaded emojis and provides search functions. - * - * @author Vincent DURMONT [vdurmont@gmail.com] - */ -public class EmojiManager { - private static final String PATH = "/emojis.json"; - private static final Map EMOJIS_BY_ALIAS = - new HashMap(); - private static final Map> EMOJIS_BY_TAG = - new HashMap>(); - private static final List ALL_EMOJIS; - static final EmojiTrie EMOJI_TRIE; - - static { - try { - InputStream stream = EmojiLoader.class.getResourceAsStream(PATH); - List emojis = EmojiLoader.loadEmojis(stream); - ALL_EMOJIS = emojis; - for (Emoji emoji : emojis) { - for (String tag : emoji.getTags()) { - if (EMOJIS_BY_TAG.get(tag) == null) { - EMOJIS_BY_TAG.put(tag, new HashSet()); - } - EMOJIS_BY_TAG.get(tag).add(emoji); - } - for (String alias : emoji.getAliases()) { - EMOJIS_BY_ALIAS.put(alias, emoji); - } - } - - EMOJI_TRIE = new EmojiTrie(emojis); - Collections.sort(ALL_EMOJIS, new Comparator() { - public int compare(Emoji e1, Emoji e2) { - return e2.getUnicode().length() - e1.getUnicode().length(); - } - }); - stream.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * No need for a constructor, all the methods are static. - */ - private EmojiManager() {} - - /** - * Returns all the {@link com.vdurmont.emoji.Emoji}s for a given tag. - * - * @param tag the tag - * - * @return the associated {@link com.vdurmont.emoji.Emoji}s, null if the tag - * is unknown - */ - public static Set getForTag(String tag) { - if (tag == null) { - return null; - } - return EMOJIS_BY_TAG.get(tag); - } - - /** - * Returns the {@link com.vdurmont.emoji.Emoji} for a given alias. - * - * @param alias the alias - * - * @return the associated {@link com.vdurmont.emoji.Emoji}, null if the alias - * is unknown - */ - public static Emoji getForAlias(String alias) { - if (alias == null || alias.isEmpty()) { - return null; - } - return EMOJIS_BY_ALIAS.get(trimAlias(alias)); - } - - private static String trimAlias(String alias) { - int len = alias.length(); - return alias.substring( - alias.charAt(0) == ':' ? 1 : 0, - alias.charAt(len - 1) == ':' ? len - 1 : len); - } - - - /** - * Returns the {@link com.vdurmont.emoji.Emoji} for a given unicode. - * - * @param unicode the the unicode - * - * @return the associated {@link com.vdurmont.emoji.Emoji}, null if the - * unicode is unknown - */ - public static Emoji getByUnicode(String unicode) { - if (unicode == null) { - return null; - } - return EMOJI_TRIE.getEmoji(unicode); - } - - /** - * Returns all the {@link com.vdurmont.emoji.Emoji}s - * - * @return all the {@link com.vdurmont.emoji.Emoji}s - */ - public static Collection getAll() { - return ALL_EMOJIS; - } - - /** - * Tests if a given String is an emoji. - * - * @param string the string to test - * - * @return true if the string is an emoji's unicode, false else - */ - public static boolean isEmoji(String string) { - if (string == null) return false; - - EmojiParser.UnicodeCandidate unicodeCandidate = EmojiParser.getNextUnicodeCandidate(string.toCharArray(), 0); - return unicodeCandidate != null && - unicodeCandidate.getEmojiStartIndex() == 0 && - unicodeCandidate.getFitzpatrickEndIndex() == string.length(); - } - - /** - * Tests if a given String contains an emoji. - * - * @param string the string to test - * - * @return true if the string contains an emoji's unicode, false otherwise - */ - public static boolean containsEmoji(String string) { - if (string == null) return false; - - return EmojiParser.getNextUnicodeCandidate(string.toCharArray(), 0) != null; - } - - /** - * Tests if a given String only contains emojis. - * - * @param string the string to test - * - * @return true if the string only contains emojis, false else - */ - public static boolean isOnlyEmojis(String string) { - return string != null && EmojiParser.removeAllEmojis(string).isEmpty(); - } - - /** - * Checks if sequence of chars contain an emoji. - * @param sequence Sequence of char that may contain emoji in full or - * partially. - * - * @return - * <li> - * Matches.EXACTLY if char sequence in its entirety is an emoji - * </li> - * <li> - * Matches.POSSIBLY if char sequence matches prefix of an emoji - * </li> - * <li> - * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an - * emoji - * </li> - */ - public static EmojiTrie.Matches isEmoji(char[] sequence) { - return EMOJI_TRIE.isEmoji(sequence); - } - - /** - * Returns all the tags in the database - * - * @return the tags - */ - public static Collection getAllTags() { - return EMOJIS_BY_TAG.keySet(); - } -} +package com.vdurmont.emoji; + +import com.vdurmont.emoji.validator.NonNullAndEmptyValidator; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Holds the loaded emojis and provides search functions. + * + * @author Vincent DURMONT [vdurmont@gmail.com] + */ +public class EmojiManager { + private static final String PATH = "/emojis.json"; + private static final Map EMOJIS_BY_ALIAS = + new HashMap(); + private static final Map> EMOJIS_BY_TAG = + new HashMap>(); + private static final List ALL_EMOJIS; + static final EmojiTrie EMOJI_TRIE; + + static { + try { + InputStream stream = EmojiLoader.class.getResourceAsStream(PATH); + List emojis = EmojiLoader.loadEmojis(stream); + ALL_EMOJIS = emojis; + for (Emoji emoji : emojis) { + for (String tag : emoji.getTags()) { + if (EMOJIS_BY_TAG.get(tag) == null) { + EMOJIS_BY_TAG.put(tag, new HashSet()); + } + EMOJIS_BY_TAG.get(tag).add(emoji); + } + for (String alias : emoji.getAliases()) { + EMOJIS_BY_ALIAS.put(alias, emoji); + } + } + + EMOJI_TRIE = new EmojiTrie(emojis); + Collections.sort(ALL_EMOJIS, new Comparator() { + public int compare(Emoji e1, Emoji e2) { + return e2.getUnicode().length() - e1.getUnicode().length(); + } + }); + stream.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * No need for a constructor, all the methods are static. + */ + private EmojiManager() {} + + /** + * Returns all the {@link com.vdurmont.emoji.Emoji}s for a given tag. + * + * @param tag the tag + * + * @return the associated {@link com.vdurmont.emoji.Emoji}s, null if the tag + * is unknown + */ + public static Set getForTag(String tag) { + if (tag == null) { + return null; + } + return EMOJIS_BY_TAG.get(tag); + } + + /** + * Returns the {@link com.vdurmont.emoji.Emoji} for a given alias. + * + * @param alias the alias + * + * @return the associated {@link com.vdurmont.emoji.Emoji}, null if the alias + * is unknown + */ + public static Emoji getForAlias(String alias) { + if (alias == null || alias.isEmpty()) { + return null; + } + return EMOJIS_BY_ALIAS.get(trimAlias(alias)); + } + + private static String trimAlias(String alias) { + int len = alias.length(); + return alias.substring( + alias.charAt(0) == ':' ? 1 : 0, + alias.charAt(len - 1) == ':' ? len - 1 : len); + } + + + /** + * Returns the {@link com.vdurmont.emoji.Emoji} for a given unicode. + * + * @param unicode the the unicode + * + * @return the associated {@link com.vdurmont.emoji.Emoji}, null if the + * unicode is unknown + */ + public static Emoji getByUnicode(String unicode) { + if (unicode == null) { + return null; + } + return EMOJI_TRIE.getEmoji(unicode); + } + + /** + * Returns all the {@link com.vdurmont.emoji.Emoji}s + * + * @return all the {@link com.vdurmont.emoji.Emoji}s + */ + public static Collection getAll() { + return ALL_EMOJIS; + } + + /** + * Tests if a given String is an emoji. + * + * @param string the string to test + * + * @return true if the string is an emoji's unicode, false else + */ + public static boolean isEmoji(String string) { + if (string == null) return false; + + // Get the next Unicode candidate from the string + EmojiParser.UnicodeCandidate unicodeCandidate = EmojiParser.getNextUnicodeCandidate(string.toCharArray(), 0); + + // Check if the unicodeCandidate is not null + boolean unicodeCandidateNotNull = unicodeCandidate != null; + + // Check if the emoji starts at index 0 + boolean emojiStartsAtIndexZero = unicodeCandidateNotNull && unicodeCandidate.getEmojiStartIndex() == 0; + + // Check if the Fitzpatrick end index of the unicodeCandidate matches the length of the string + boolean fitsStringLength = unicodeCandidateNotNull && unicodeCandidate.getFitzpatrickEndIndex() == string.length(); + + // Return true only if all conditions are met + return unicodeCandidateNotNull && emojiStartsAtIndexZero && fitsStringLength; + } + + /*old version + * public static boolean isEmoji(String string) { + if (string == null) return false; + + EmojiParser.UnicodeCandidate unicodeCandidate = EmojiParser.getNextUnicodeCandidate(string.toCharArray(), 0); + return unicodeCandidate != null && + unicodeCandidate.getEmojiStartIndex() == 0 && + unicodeCandidate.getFitzpatrickEndIndex() == string.length(); + } + * + * */ + + /** + * Tests if a given String contains an emoji. + * + * @param string the string to test + * + * @return true if the string contains an emoji's unicode, false otherwise + */ + public static boolean containsEmoji(String string) { + if (string == null) return false; + + return EmojiParser.getNextUnicodeCandidate(string.toCharArray(), 0) != null; + } + + /** + * Tests if a given String only contains emojis. + * + * @param string the string to test + * + * @return true if the string only contains emojis, false else + */ + public static boolean isOnlyEmojis(String string) { + return (new NonNullAndEmptyValidator()).isValid(string); + } + + /** + * Checks if sequence of chars contain an emoji. + * @param sequence Sequence of char that may contain emoji in full or + * partially. + * + * @return + * <li> + * Matches.EXACTLY if char sequence in its entirety is an emoji + * </li> + * <li> + * Matches.POSSIBLY if char sequence matches prefix of an emoji + * </li> + * <li> + * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an + * emoji + * </li> + */ + public static EmojiTrie.Matches isEmoji(char[] sequence) { + return EMOJI_TRIE.isEmoji(sequence); + } + + /** + * Returns all the tags in the database + * + * @return the tags + */ + public static Collection getAllTags() { + return EMOJIS_BY_TAG.keySet(); + } +} diff --git a/src/main/java/com/vdurmont/emoji/EmojiParser.java b/src/main/java/com/vdurmont/emoji/EmojiParser.java index b6294a47..27b5c207 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiParser.java +++ b/src/main/java/com/vdurmont/emoji/EmojiParser.java @@ -1,576 +1,575 @@ -package com.vdurmont.emoji; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * Provides methods to parse strings with emojis. - * - * @author Vincent DURMONT [vdurmont@gmail.com] - */ -public class EmojiParser { - - /** - * See {@link #parseToAliases(String, FitzpatrickAction)} with the action - * "PARSE" - * - * @param input the string to parse - * - * @return the string with the emojis replaced by their alias. - */ - public static String parseToAliases(String input) { - return parseToAliases(input, FitzpatrickAction.PARSE); - } - - /** - * Replaces the emoji's unicode occurrences by one of their alias - * (between 2 ':').
- * Example: šŸ˜„ will be replaced by :smile:
- *
- * When a fitzpatrick modifier is present with a PARSE action, a "|" will be - * appendend to the alias, with the fitzpatrick type.
- * Example: šŸ‘¦šŸæ will be replaced by - * :boy|type_6:
- * The fitzpatrick types are: type_1_2, type_3, type_4, type_5, type_6
- *
- * When a fitzpatrick modifier is present with a REMOVE action, the modifier - * will be deleted.
- * Example: šŸ‘¦šŸæ will be replaced by :boy:
- *
- * When a fitzpatrick modifier is present with a IGNORE action, the modifier - * will be ignored.
- * Example: šŸ‘¦šŸæ will be replaced by :boy:šŸæ
- * - * @param input the string to parse - * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers - * - * @return the string with the emojis replaced by their alias. - */ - public static String parseToAliases( - String input, - final FitzpatrickAction fitzpatrickAction - ) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - switch (fitzpatrickAction) { - default: - case PARSE: - if (unicodeCandidate.hasFitzpatrick()) { - return ":" + - unicodeCandidate.getEmoji().getAliases().get(0) + - "|" + - unicodeCandidate.getFitzpatrickType() + - ":"; - } - case REMOVE: - return ":" + - unicodeCandidate.getEmoji().getAliases().get(0) + - ":"; - case IGNORE: - return ":" + - unicodeCandidate.getEmoji().getAliases().get(0) + - ":" + - unicodeCandidate.getFitzpatrickUnicode(); - } - } - }; - - return parseFromUnicode(input, emojiTransformer); - } - - /** - * Replace all emojis with character - * - * @param str the string to process - * @param replacementString replacement the string that will replace all the emojis - * @return the string with replaced character - */ - public static String replaceAllEmojis(String str, final String replacementString) { - EmojiParser.EmojiTransformer emojiTransformer = new EmojiParser.EmojiTransformer() { - public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) { - return replacementString; - } - }; - - return parseFromUnicode(str, emojiTransformer); - } - - - /** - * Replaces the emoji's aliases (between 2 ':') occurrences and the html - * representations by their unicode.
- * Examples:
- * :smile: will be replaced by šŸ˜„
- * &#128516; will be replaced by šŸ˜„
- * :boy|type_6: will be replaced by šŸ‘¦šŸæ - * - * @param input the string to parse - * - * @return the string with the aliases and html representations replaced by - * their unicode. - */ - public static String parseToUnicode(String input) { - StringBuilder sb = new StringBuilder(input.length()); - - for (int last = 0; last < input.length(); last++) { - AliasCandidate alias = getAliasAt(input, last); - if (alias == null) { - alias = getHtmlEncodedEmojiAt(input, last); - } - - if (alias != null) { - sb.append(alias.emoji.getUnicode()); - last = alias.endIndex; - - if (alias.fitzpatrick != null) { - sb.append(alias.fitzpatrick.unicode); - } - } else { - sb.append(input.charAt(last)); - } - } - - return sb.toString(); - } - - /** Finds the alias in the given string starting at the given point, null otherwise */ - protected static AliasCandidate getAliasAt(String input, int start) { - if (input.length() < start + 2 || input.charAt(start) != ':') return null; // Aliases start with : - int aliasEnd = input.indexOf(':', start + 2); // Alias must be at least 1 char in length - if (aliasEnd == -1) return null; // No alias end found - - int fitzpatrickStart = input.indexOf('|', start + 2); - if (fitzpatrickStart != -1 && fitzpatrickStart < aliasEnd) { - Emoji emoji = EmojiManager.getForAlias(input.substring(start, fitzpatrickStart)); - if (emoji == null) return null; // Not a valid alias - if (!emoji.supportsFitzpatrick()) return null; // Fitzpatrick was specified, but the emoji does not support it - Fitzpatrick fitzpatrick = Fitzpatrick.fitzpatrickFromType(input.substring(fitzpatrickStart + 1, aliasEnd)); - return new AliasCandidate(emoji, fitzpatrick, start, aliasEnd); - } - - Emoji emoji = EmojiManager.getForAlias(input.substring(start, aliasEnd)); - if (emoji == null) return null; // Not a valid alias - return new AliasCandidate(emoji, null, start, aliasEnd); - } - - /** Finds the HTML encoded emoji in the given string starting at the given point, null otherwise */ - protected static AliasCandidate getHtmlEncodedEmojiAt(String input, int start) { - if (input.length() < start + 4 || input.charAt(start) != '&' || input.charAt(start + 1) != '#') return null; - - Emoji longestEmoji = null; - int longestCodePointEnd = -1; - char[] chars = new char[EmojiManager.EMOJI_TRIE.maxDepth]; - int charsIndex = 0; - int codePointStart = start; - do { - int codePointEnd = input.indexOf(';', codePointStart + 3); // Code point must be at least 1 char in length - if (codePointEnd == -1) break; - - try { - int radix = input.charAt(codePointStart + 2) == 'x' ? 16 : 10; - int codePoint = Integer.parseInt(input.substring(codePointStart + 2 + radix / 16, codePointEnd), radix); - charsIndex += Character.toChars(codePoint, chars, charsIndex); - } catch (NumberFormatException e) { - break; - } catch (IllegalArgumentException e) { - break; - } - Emoji foundEmoji = EmojiManager.EMOJI_TRIE.getEmoji(chars, 0, charsIndex); - if (foundEmoji != null) { - longestEmoji = foundEmoji; - longestCodePointEnd = codePointEnd; - } - codePointStart = codePointEnd + 1; - } while (input.length() > codePointStart + 4 && - input.charAt(codePointStart) == '&' && - input.charAt(codePointStart + 1) == '#' && - charsIndex < chars.length && - !EmojiManager.EMOJI_TRIE.isEmoji(chars, 0, charsIndex).impossibleMatch()); - - if (longestEmoji == null) return null; - return new AliasCandidate(longestEmoji, null, start, longestCodePointEnd); - } - - /** - * See {@link #parseToHtmlDecimal(String, FitzpatrickAction)} with the action - * "PARSE" - * - * @param input the string to parse - * - * @return the string with the emojis replaced by their html decimal - * representation. - */ - public static String parseToHtmlDecimal(String input) { - return parseToHtmlDecimal(input, FitzpatrickAction.PARSE); - } - - /** - * Replaces the emoji's unicode occurrences by their html representation.
- * Example: šŸ˜„ will be replaced by &#128516;
- *
- * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the - * modifier will be deleted from the string.
- * Example: šŸ‘¦šŸæ will be replaced by - * &#128102;
- *
- * When a fitzpatrick modifier is present with a IGNORE action, the modifier - * will be ignored and will remain in the string.
- * Example: šŸ‘¦šŸæ will be replaced by - * &#128102;šŸæ - * - * @param input the string to parse - * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers - * - * @return the string with the emojis replaced by their html decimal - * representation. - */ - public static String parseToHtmlDecimal( - String input, - final FitzpatrickAction fitzpatrickAction - ) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - switch (fitzpatrickAction) { - default: - case PARSE: - case REMOVE: - return unicodeCandidate.getEmoji().getHtmlDecimal(); - case IGNORE: - return unicodeCandidate.getEmoji().getHtmlDecimal() + - unicodeCandidate.getFitzpatrickUnicode(); - } - } - }; - - return parseFromUnicode(input, emojiTransformer); - } - - /** - * See {@link #parseToHtmlHexadecimal(String, FitzpatrickAction)} with the - * action "PARSE" - * - * @param input the string to parse - * - * @return the string with the emojis replaced by their html hex - * representation. - */ - public static String parseToHtmlHexadecimal(String input) { - return parseToHtmlHexadecimal(input, FitzpatrickAction.PARSE); - } - - /** - * Replaces the emoji's unicode occurrences by their html hex - * representation.
- * Example: šŸ‘¦ will be replaced by &#x1f466;
- *
- * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the - * modifier will be deleted.
- * Example: šŸ‘¦šŸæ will be replaced by - * &#x1f466;
- *
- * When a fitzpatrick modifier is present with a IGNORE action, the modifier - * will be ignored and will remain in the string.
- * Example: šŸ‘¦šŸæ will be replaced by - * &#x1f466;šŸæ - * - * @param input the string to parse - * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers - * - * @return the string with the emojis replaced by their html hex - * representation. - */ - public static String parseToHtmlHexadecimal( - String input, - final FitzpatrickAction fitzpatrickAction - ) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - switch (fitzpatrickAction) { - default: - case PARSE: - case REMOVE: - return unicodeCandidate.getEmoji().getHtmlHexadecimal(); - case IGNORE: - return unicodeCandidate.getEmoji().getHtmlHexadecimal() + - unicodeCandidate.getFitzpatrickUnicode(); - } - } - }; - - return parseFromUnicode(input, emojiTransformer); - } - - /** - * Removes all emojis from a String - * - * @param str the string to process - * - * @return the string without any emoji - */ - public static String removeAllEmojis(String str) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - return ""; - } - }; - - return parseFromUnicode(str, emojiTransformer); - } - - - /** - * Removes a set of emojis from a String - * - * @param str the string to process - * @param emojisToRemove the emojis to remove from this string - * - * @return the string without the emojis that were removed - */ - public static String removeEmojis( - String str, - final Collection emojisToRemove - ) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - if (!emojisToRemove.contains(unicodeCandidate.getEmoji())) { - return unicodeCandidate.getEmoji().getUnicode() + - unicodeCandidate.getFitzpatrickUnicode(); - } - return ""; - } - }; - - return parseFromUnicode(str, emojiTransformer); - } - - /** - * Removes all the emojis in a String except a provided set - * - * @param str the string to process - * @param emojisToKeep the emojis to keep in this string - * - * @return the string without the emojis that were removed - */ - public static String removeAllEmojisExcept( - String str, - final Collection emojisToKeep - ) { - EmojiTransformer emojiTransformer = new EmojiTransformer() { - public String transform(UnicodeCandidate unicodeCandidate) { - if (emojisToKeep.contains(unicodeCandidate.getEmoji())) { - return unicodeCandidate.getEmoji().getUnicode() + - unicodeCandidate.getFitzpatrickUnicode(); - } - return ""; - } - }; - - return parseFromUnicode(str, emojiTransformer); - } - - - /** - * Detects all unicode emojis in input string and replaces them with the - * return value of transformer.transform() - * - * @param input the string to process - * @param transformer emoji transformer to apply to each emoji - * - * @return input string with all emojis transformed - */ - public static String parseFromUnicode( - String input, - EmojiTransformer transformer - ) { - int prev = 0; - StringBuilder sb = new StringBuilder(input.length()); - List replacements = getUnicodeCandidates(input); - for (UnicodeCandidate candidate : replacements) { - sb.append(input, prev, candidate.getEmojiStartIndex()); - - sb.append(transformer.transform(candidate)); - prev = candidate.getFitzpatrickEndIndex(); - } - - return sb.append(input.substring(prev)).toString(); - } - - public static List extractEmojis(String input) { - List emojis = getUnicodeCandidates(input); - List result = new ArrayList(); - for (UnicodeCandidate emoji : emojis) { - if (emoji.getEmoji().supportsFitzpatrick() && emoji.hasFitzpatrick()) { - result.add(emoji.getEmoji().getUnicode(emoji.getFitzpatrick())); - } else { - result.add(emoji.getEmoji().getUnicode()); - } - } - return result; - } - - - /** - * Generates a list UnicodeCandidates found in input string. A - * UnicodeCandidate is created for every unicode emoticon found in input - * string, additionally if Fitzpatrick modifier follows the emoji, it is - * included in UnicodeCandidate. Finally, it contains start and end index of - * unicode emoji itself (WITHOUT Fitzpatrick modifier whether it is there or - * not!). - * - * @param input String to find all unicode emojis in - * @return List of UnicodeCandidates for each unicode emote in text - */ - protected static List getUnicodeCandidates(String input) { - char[] inputCharArray = input.toCharArray(); - List candidates = new ArrayList(); - UnicodeCandidate next; - for (int i = 0; (next = getNextUnicodeCandidate(inputCharArray, i)) != null; i = next.getFitzpatrickEndIndex()) { - candidates.add(next); - } - - return candidates; - } - - /** - * Finds the next UnicodeCandidate after a given starting index - * - * @param chars char array to find UnicodeCandidate in - * @param start starting index for search - * @return the next UnicodeCandidate or null if no UnicodeCandidate is found after start index - */ - protected static UnicodeCandidate getNextUnicodeCandidate(char[] chars, int start) { - for (int i = start; i < chars.length; i++) { - int emojiEnd = getEmojiEndPos(chars, i); - - if (emojiEnd != -1) { - Emoji emoji = EmojiManager.getByUnicode(new String(chars, i, emojiEnd - i)); - String fitzpatrickString = (emojiEnd + 2 <= chars.length) ? - new String(chars, emojiEnd, 2) : - null; - return new UnicodeCandidate( - emoji, - fitzpatrickString, - i - ); - } - } - - return null; - } - - - /** - * Returns end index of a unicode emoji if it is found in text starting at - * index startPos, -1 if not found. - * This returns the longest matching emoji, for example, in - * "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66" - * it will find alias:family_man_woman_boy, NOT alias:man - * - * @param text the current text where we are looking for an emoji - * @param startPos the position in the text where we should start looking for - * an emoji end - * - * @return the end index of the unicode emoji starting at startPos. -1 if not - * found - */ - protected static int getEmojiEndPos(char[] text, int startPos) { - int best = -1; - for (int j = startPos + 1; j <= text.length; j++) { - EmojiTrie.Matches status = EmojiManager.EMOJI_TRIE.isEmoji(text, startPos, j); - - if (status.exactMatch()) { - best = j; - } else if (status.impossibleMatch()) { - return best; - } - } - - return best; - } - - - public static class UnicodeCandidate { - private final Emoji emoji; - private final Fitzpatrick fitzpatrick; - private final int startIndex; - - private UnicodeCandidate(Emoji emoji, String fitzpatrick, int startIndex) { - this.emoji = emoji; - this.fitzpatrick = Fitzpatrick.fitzpatrickFromUnicode(fitzpatrick); - this.startIndex = startIndex; - } - - public Emoji getEmoji() { - return emoji; - } - - public boolean hasFitzpatrick() { - return getFitzpatrick() != null; - } - - public Fitzpatrick getFitzpatrick() { - return fitzpatrick; - } - - public String getFitzpatrickType() { - return hasFitzpatrick() ? fitzpatrick.name().toLowerCase() : ""; - } - - public String getFitzpatrickUnicode() { - return hasFitzpatrick() ? fitzpatrick.unicode : ""; - } - - public int getEmojiStartIndex() { - return startIndex; - } - - public int getEmojiEndIndex() { - return startIndex + emoji.getUnicode().length(); - } - - public int getFitzpatrickEndIndex() { - return getEmojiEndIndex() + (fitzpatrick != null ? 2 : 0); - } - } - - - protected static class AliasCandidate { - public final Emoji emoji; - public final Fitzpatrick fitzpatrick; - public final int startIndex; - public final int endIndex; - - private AliasCandidate(Emoji emoji, Fitzpatrick fitzpatrick, int startIndex, int endIndex) { - this.emoji = emoji; - this.fitzpatrick = fitzpatrick; - this.startIndex = startIndex; - this.endIndex = endIndex; - } - } - - /** - * Enum used to indicate what should be done when a Fitzpatrick modifier is - * found. - */ - public enum FitzpatrickAction { - /** - * Tries to match the Fitzpatrick modifier with the previous emoji - */ - PARSE, - - /** - * Removes the Fitzpatrick modifier from the string - */ - REMOVE, - - /** - * Ignores the Fitzpatrick modifier (it will stay in the string) - */ - IGNORE - } - - public interface EmojiTransformer { - String transform(UnicodeCandidate unicodeCandidate); - } -} +package com.vdurmont.emoji; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +/** + * Provides methods to parse strings with emojis. + * + * @author Vincent DURMONT [vdurmont@gmail.com] + */ +public class EmojiParser { + + /** + * See {@link #parseToAliases(String, FitzpatrickAction)} with the action + * "PARSE" + * + * @param input the string to parse + * + * @return the string with the emojis replaced by their alias. + */ + public static String parseToAliases(String input) { + return parseToAliases(input, FitzpatrickAction.PARSE); + } + + /** + * Replaces the emoji's unicode occurrences by one of their alias + * (between 2 ':').
+ * Example: šŸ˜„ will be replaced by :smile:
+ *
+ * When a fitzpatrick modifier is present with a PARSE action, a "|" will be + * appendend to the alias, with the fitzpatrick type.
+ * Example: šŸ‘¦šŸæ will be replaced by + * :boy|type_6:
+ * The fitzpatrick types are: type_1_2, type_3, type_4, type_5, type_6
+ *
+ * When a fitzpatrick modifier is present with a REMOVE action, the modifier + * will be deleted.
+ * Example: šŸ‘¦šŸæ will be replaced by :boy:
+ *
+ * When a fitzpatrick modifier is present with a IGNORE action, the modifier + * will be ignored.
+ * Example: šŸ‘¦šŸæ will be replaced by :boy:šŸæ
+ * + * @param input the string to parse + * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers + * + * @return the string with the emojis replaced by their alias. + */ + public static String parseToAliases( + String input, + final FitzpatrickAction fitzpatrickAction + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + switch (fitzpatrickAction) { + default: + case PARSE: + if (unicodeCandidate.hasFitzpatrick()) { + return ":" + + unicodeCandidate.getEmoji().getAliases().get(0) + + "|" + + unicodeCandidate.getFitzpatrickType() + + ":"; + } + case REMOVE: + return ":" + + unicodeCandidate.getEmoji().getAliases().get(0) + + ":"; + case IGNORE: + return ":" + + unicodeCandidate.getEmoji().getAliases().get(0) + + ":" + + unicodeCandidate.getFitzpatrickUnicode(); + } + } + }; + + return parseFromUnicode(input, emojiTransformer); + } + + /** + * Replace all emojis with character + * + * @param str the string to process + * @param replacementString replacement the string that will replace all the emojis + * @return the string with replaced character + */ + public static String replaceAllEmojis(String str, final String replacementString) { + EmojiParser.EmojiTransformer emojiTransformer = new EmojiParser.EmojiTransformer() { + public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) { + return replacementString; + } + }; + + return parseFromUnicode(str, emojiTransformer); + } + + + /** + * Replaces the emoji's aliases (between 2 ':') occurrences and the html + * representations by their unicode.
+ * Examples:
+ * :smile: will be replaced by šŸ˜„
+ * &#128516; will be replaced by šŸ˜„
+ * :boy|type_6: will be replaced by šŸ‘¦šŸæ + * + * @param input the string to parse + * + * @return the string with the aliases and html representations replaced by + * their unicode. + */ + public static String parseToUnicode(String input) { + StringBuilder sb = new StringBuilder(input.length()); + + for (int last = 0; last < input.length(); last++) { + AliasCandidate alias = getAliasAt(input, last); + if (alias == null) { + alias = getHtmlEncodedEmojiAt(input, last); + } + + if (alias != null) { + sb.append(alias.emoji.getUnicode()); + last = alias.endIndex; + + if (alias.fitzpatrick != null) { + sb.append(alias.fitzpatrick.unicode); + } + } else { + sb.append(input.charAt(last)); + } + } + + return sb.toString(); + } + + /** Finds the alias in the given string starting at the given point, null otherwise */ + protected static AliasCandidate getAliasAt(String input, int start) { + if (input.length() < start + 2 || input.charAt(start) != ':') return null; // Aliases start with : + int aliasEnd = input.indexOf(':', start + 2); // Alias must be at least 1 char in length + if (aliasEnd == -1) return null; // No alias end found + + int fitzpatrickStart = input.indexOf('|', start + 2); + if (fitzpatrickStart != -1 && fitzpatrickStart < aliasEnd) { + Emoji emoji = EmojiManager.getForAlias(input.substring(start, fitzpatrickStart)); + if (emoji == null) return null; // Not a valid alias + if (!emoji.supportsFitzpatrick()) return null; // Fitzpatrick was specified, but the emoji does not support it + Fitzpatrick fitzpatrick = Fitzpatrick.fitzpatrickFromType(input.substring(fitzpatrickStart + 1, aliasEnd)); + return new AliasCandidate(emoji, fitzpatrick, start, aliasEnd); + } + + Emoji emoji = EmojiManager.getForAlias(input.substring(start, aliasEnd)); + if (emoji == null) return null; // Not a valid alias + return new AliasCandidate(emoji, null, start, aliasEnd); + } + + /** Finds the HTML encoded emoji in the given string starting at the given point, null otherwise */ + protected static AliasCandidate getHtmlEncodedEmojiAt(String input, int start) { + if (input.length() < start + 4 || input.charAt(start) != '&' || input.charAt(start + 1) != '#') return null; + + Emoji longestEmoji = null; + int longestCodePointEnd = -1; + char[] chars = new char[EmojiManager.EMOJI_TRIE.maxDepth]; + int charsIndex = 0; + int codePointStart = start; + do { + int codePointEnd = input.indexOf(';', codePointStart + 3); // Code point must be at least 1 char in length + if (codePointEnd == -1) break; + + try { + int radix = input.charAt(codePointStart + 2) == 'x' ? 16 : 10; + int codePoint = Integer.parseInt(input.substring(codePointStart + 2 + radix / 16, codePointEnd), radix); + charsIndex += Character.toChars(codePoint, chars, charsIndex); + } catch (NumberFormatException e) { + break; + } catch (IllegalArgumentException e) { + break; + } + Emoji foundEmoji = EmojiManager.EMOJI_TRIE.getEmoji(chars, 0, charsIndex); + if (foundEmoji != null) { + longestEmoji = foundEmoji; + longestCodePointEnd = codePointEnd; + } + codePointStart = codePointEnd + 1; + } while (input.length() > codePointStart + 4 && + input.charAt(codePointStart) == '&' && + input.charAt(codePointStart + 1) == '#' && + charsIndex < chars.length && + !EmojiManager.EMOJI_TRIE.isEmoji(chars, 0, charsIndex).isImpossibleMatch()); + + if (longestEmoji == null) return null; + return new AliasCandidate(longestEmoji, null, start, longestCodePointEnd); + } + + /** + * See {@link #parseToHtmlDecimal(String, FitzpatrickAction)} with the action + * "PARSE" + * + * @param input the string to parse + * + * @return the string with the emojis replaced by their html decimal + * representation. + */ + public static String parseToHtmlDecimal(String input) { + return parseToHtmlDecimal(input, FitzpatrickAction.PARSE); + } + + /** + * Replaces the emoji's unicode occurrences by their html representation.
+ * Example: šŸ˜„ will be replaced by &#128516;
+ *
+ * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the + * modifier will be deleted from the string.
+ * Example: šŸ‘¦šŸæ will be replaced by + * &#128102;
+ *
+ * When a fitzpatrick modifier is present with a IGNORE action, the modifier + * will be ignored and will remain in the string.
+ * Example: šŸ‘¦šŸæ will be replaced by + * &#128102;šŸæ + * + * @param input the string to parse + * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers + * + * @return the string with the emojis replaced by their html decimal + * representation. + */ + public static String parseToHtmlDecimal( + String input, + final FitzpatrickAction fitzpatrickAction + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + switch (fitzpatrickAction) { + default: + case PARSE: + case REMOVE: + return unicodeCandidate.getEmoji().getHtmlDecimal(); + case IGNORE: + return unicodeCandidate.getEmoji().getHtmlDecimal() + + unicodeCandidate.getFitzpatrickUnicode(); + } + } + }; + + return parseFromUnicode(input, emojiTransformer); + } + + /** + * See {@link #parseToHtmlHexadecimal(String, FitzpatrickAction)} with the + * action "PARSE" + * + * @param input the string to parse + * + * @return the string with the emojis replaced by their html hex + * representation. + */ + public static String parseToHtmlHexadecimal(String input) { + return parseToHtmlHexadecimal(input, FitzpatrickAction.PARSE); + } + + /** + * Replaces the emoji's unicode occurrences by their html hex + * representation.
+ * Example: šŸ‘¦ will be replaced by &#x1f466;
+ *
+ * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the + * modifier will be deleted.
+ * Example: šŸ‘¦šŸæ will be replaced by + * &#x1f466;
+ *
+ * When a fitzpatrick modifier is present with a IGNORE action, the modifier + * will be ignored and will remain in the string.
+ * Example: šŸ‘¦šŸæ will be replaced by + * &#x1f466;šŸæ + * + * @param input the string to parse + * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers + * + * @return the string with the emojis replaced by their html hex + * representation. + */ + public static String parseToHtmlHexadecimal( + String input, + final FitzpatrickAction fitzpatrickAction + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + switch (fitzpatrickAction) { + default: + case PARSE: + case REMOVE: + return unicodeCandidate.getEmoji().getHtmlHexadecimal(); + case IGNORE: + return unicodeCandidate.getEmoji().getHtmlHexadecimal() + + unicodeCandidate.getFitzpatrickUnicode(); + } + } + }; + + return parseFromUnicode(input, emojiTransformer); + } + + /** + * Removes all emojis from a String + * + * @param str the string to process + * + * @return the string without any emoji + */ + public static String removeAllEmojis(String str) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + return ""; + } + }; + + return parseFromUnicode(str, emojiTransformer); + } + + + /** + * Removes a set of emojis from a String + * + * @param str the string to process + * @param emojisToRemove the emojis to remove from this string + * + * @return the string without the emojis that were removed + */ + public static String removeEmojis( + String str, + final Collection emojisToRemove + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + if (!emojisToRemove.contains(unicodeCandidate.getEmoji())) { + return unicodeCandidate.getEmoji().getUnicode() + + unicodeCandidate.getFitzpatrickUnicode(); + } + return ""; + } + }; + + return parseFromUnicode(str, emojiTransformer); + } + + /** + * Removes all the emojis in a String except a provided set + * + * @param str the string to process + * @param emojisToKeep the emojis to keep in this string + * + * @return the string without the emojis that were removed + */ + public static String removeAllEmojisExcept( + String str, + final Collection emojisToKeep + ) { + EmojiTransformer emojiTransformer = new EmojiTransformer() { + public String transform(UnicodeCandidate unicodeCandidate) { + if (emojisToKeep.contains(unicodeCandidate.getEmoji())) { + return unicodeCandidate.getEmoji().getUnicode() + + unicodeCandidate.getFitzpatrickUnicode(); + } + return ""; + } + }; + + return parseFromUnicode(str, emojiTransformer); + } + + + /** + * Detects all unicode emojis in input string and replaces them with the + * return value of transformer.transform() + * + * @param input the string to process + * @param transformer emoji transformer to apply to each emoji + * + * @return input string with all emojis transformed + */ + public static String parseFromUnicode( + String input, + EmojiTransformer transformer + ) { + int prev = 0; + StringBuilder sb = new StringBuilder(input.length()); + List replacements = getUnicodeCandidates(input); + for (UnicodeCandidate candidate : replacements) { + sb.append(input, prev, candidate.getEmojiStartIndex()); + + sb.append(transformer.transform(candidate)); + prev = candidate.getFitzpatrickEndIndex(); + } + + return sb.append(input.substring(prev)).toString(); + } + + public static List extractEmojis(String input) { + List emojis = getUnicodeCandidates(input); + List result = new ArrayList(); + for (UnicodeCandidate emoji : emojis) { + if (emoji.getEmoji().supportsFitzpatrick() && emoji.hasFitzpatrick()) { + result.add(emoji.getEmoji().getUnicode(emoji.getFitzpatrick())); + } else { + result.add(emoji.getEmoji().getUnicode()); + } + } + return result; + } + + + /** + * Generates a list UnicodeCandidates found in input string. A + * UnicodeCandidate is created for every unicode emoticon found in input + * string, additionally if Fitzpatrick modifier follows the emoji, it is + * included in UnicodeCandidate. Finally, it contains start and end index of + * unicode emoji itself (WITHOUT Fitzpatrick modifier whether it is there or + * not!). + * + * @param input String to find all unicode emojis in + * @return List of UnicodeCandidates for each unicode emote in text + */ + protected static List getUnicodeCandidates(String input) { + char[] inputCharArray = input.toCharArray(); + List candidates = new ArrayList(); + UnicodeCandidate next; + for (int i = 0; (next = getNextUnicodeCandidate(inputCharArray, i)) != null; i = next.getFitzpatrickEndIndex()) { + candidates.add(next); + } + + return candidates; + } + + /** + * Finds the next UnicodeCandidate after a given starting index + * + * @param chars char array to find UnicodeCandidate in + * @param start starting index for search + * @return the next UnicodeCandidate or null if no UnicodeCandidate is found after start index + */ + protected static UnicodeCandidate getNextUnicodeCandidate(char[] chars, int start) { + for (int i = start; i < chars.length; i++) { + int emojiEnd = getEmojiEndPos(chars, i); + + if (emojiEnd != -1) { + Emoji emoji = EmojiManager.getByUnicode(new String(chars, i, emojiEnd - i)); + String fitzpatrickString = (emojiEnd + 2 <= chars.length) ? + new String(chars, emojiEnd, 2) : + null; + return new UnicodeCandidate( + emoji, + fitzpatrickString, + i + ); + } + } + + return null; + } + + + /** + * Returns end index of a unicode emoji if it is found in text starting at + * index startPos, -1 if not found. + * This returns the longest matching emoji, for example, in + * "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66" + * it will find alias:family_man_woman_boy, NOT alias:man + * + * @param text the current text where we are looking for an emoji + * @param startPos the position in the text where we should start looking for + * an emoji end + * + * @return the end index of the unicode emoji starting at startPos. -1 if not + * found + */ + protected static int getEmojiEndPos(char[] text, int startPos) { + int best = -1; + for (int j = startPos + 1; j <= text.length; j++) { + EmojiTrie.Matches status = EmojiManager.EMOJI_TRIE.isEmoji(text, startPos, j); + if (status.isExactMatch()) { + best = j; + } else if (status.isImpossibleMatch()) { + return best; + } + } + + return best; + } + + + public static class UnicodeCandidate { + private final Emoji emoji; + private final Fitzpatrick fitzpatrick; + private final int startIndex; + + private UnicodeCandidate(Emoji emoji, String fitzpatrick, int startIndex) { + this.emoji = emoji; + this.fitzpatrick = Fitzpatrick.fitzpatrickFromUnicode(fitzpatrick); + this.startIndex = startIndex; + } + + public Emoji getEmoji() { + return emoji; + } + + public boolean hasFitzpatrick() { + return getFitzpatrick() != null; + } + + public Fitzpatrick getFitzpatrick() { + return fitzpatrick; + } + + public String getFitzpatrickType() { + return hasFitzpatrick() ? fitzpatrick.name().toLowerCase() : ""; + } + + public String getFitzpatrickUnicode() { + return hasFitzpatrick() ? fitzpatrick.unicode : ""; + } + + public int getEmojiStartIndex() { + return startIndex; + } + + public int getEmojiEndIndex() { + return startIndex + emoji.getUnicode().length(); + } + + public int getFitzpatrickEndIndex() { + return getEmojiEndIndex() + (fitzpatrick != null ? 2 : 0); + } + } + + + protected static class AliasCandidate { + public final Emoji emoji; + public final Fitzpatrick fitzpatrick; + public final int startIndex; + public final int endIndex; + + private AliasCandidate(Emoji emoji, Fitzpatrick fitzpatrick, int startIndex, int endIndex) { + this.emoji = emoji; + this.fitzpatrick = fitzpatrick; + this.startIndex = startIndex; + this.endIndex = endIndex; + } + } + + /** + * Enum used to indicate what should be done when a Fitzpatrick modifier is + * found. + */ + public enum FitzpatrickAction { + /** + * Tries to match the Fitzpatrick modifier with the previous emoji + */ + PARSE, + + /** + * Removes the Fitzpatrick modifier from the string + */ + REMOVE, + + /** + * Ignores the Fitzpatrick modifier (it will stay in the string) + */ + IGNORE + } + + public interface EmojiTransformer { + String transform(UnicodeCandidate unicodeCandidate); + } +} diff --git a/src/main/java/com/vdurmont/emoji/EmojiTrie.java b/src/main/java/com/vdurmont/emoji/EmojiTrie.java index 6e59ea15..e68e56af 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiTrie.java +++ b/src/main/java/com/vdurmont/emoji/EmojiTrie.java @@ -1,142 +1,142 @@ -package com.vdurmont.emoji; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; - -public class EmojiTrie { - private final Node root = new Node(); - final int maxDepth; - - public EmojiTrie(Collection emojis) { - int maxDepth = 0; - for (Emoji emoji : emojis) { - Node tree = root; - char[] chars = emoji.getUnicode().toCharArray(); - maxDepth = Math.max(maxDepth, chars.length); - for (char c: chars) { - if (!tree.hasChild(c)) { - tree.addChild(c); - } - tree = tree.getChild(c); - } - tree.setEmoji(emoji); - } - this.maxDepth = maxDepth; - } - - - /** - * Checks if sequence of chars contain an emoji. - * - * @param sequence Sequence of char that may contain emoji in full or - * partially. - * - * @return - * <li> - * Matches.EXACTLY if char sequence in its entirety is an emoji - * </li> - * <li> - * Matches.POSSIBLY if char sequence matches prefix of an emoji - * </li> - * <li> - * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an - * emoji - * </li> - */ - public Matches isEmoji(char[] sequence) { - return isEmoji(sequence, 0, sequence.length); - } - - /** - * Checks if the sequence of chars within the given bound indices contain an emoji. - * @see #isEmoji(char[]) - */ - public Matches isEmoji(char[] sequence, int start, int end) { - if (start < 0 || start > end || end > sequence.length) { - throw new ArrayIndexOutOfBoundsException( - "start " + start + ", end " + end + ", length " + sequence.length); - } - - if (sequence == null) { - return Matches.POSSIBLY; - } - - Node tree = root; - for (int i = start; i < end; i++) { - if (!tree.hasChild(sequence[i])) { - return Matches.IMPOSSIBLE; - } - tree = tree.getChild(sequence[i]); - } - - return tree.isEndOfEmoji() ? Matches.EXACTLY : Matches.POSSIBLY; - } - - - /** - * Finds Emoji instance from emoji unicode - * @param unicode unicode of emoji to get - * @return Emoji instance if unicode matches and emoji, null otherwise. - */ - public Emoji getEmoji(String unicode) { - return getEmoji(unicode.toCharArray(), 0, unicode.length()); - } - - Emoji getEmoji(char[] sequence, int start, int end) { - if (start < 0 || start > end || end > sequence.length) { - throw new ArrayIndexOutOfBoundsException( - "start " + start + ", end " + end + ", length " + sequence.length); - } - - Node tree = root; - for (int i = 0; i < end; i++) { - if (!tree.hasChild(sequence[i])) { - return null; - } - tree = tree.getChild(sequence[i]); - } - return tree.getEmoji(); - } - - public enum Matches { - EXACTLY, POSSIBLY, IMPOSSIBLE; - - public boolean exactMatch() { - return this == EXACTLY; - } - - public boolean impossibleMatch() { - return this == IMPOSSIBLE; - } - } - - private class Node { - private Map children = new HashMap(); - private Emoji emoji; - - private void setEmoji(Emoji emoji) { - this.emoji = emoji; - } - - private Emoji getEmoji() { - return emoji; - } - - private boolean hasChild(char child) { - return children.containsKey(child); - } - - private void addChild(char child) { - children.put(child, new Node()); - } - - private Node getChild(char child) { - return children.get(child); - } - - private boolean isEndOfEmoji() { - return emoji != null; - } - } -} +package com.vdurmont.emoji; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +public class EmojiTrie { + private final Node root = new Node(); + final int maxDepth; + + public EmojiTrie(Collection emojis) { + int maxDepth = 0; + for (Emoji emoji : emojis) { + Node tree = root; + char[] chars = emoji.getUnicode().toCharArray(); + maxDepth = Math.max(maxDepth, chars.length); + for (char c: chars) { + if (!tree.hasChild(c)) { + tree.addChild(c); + } + tree = tree.getChild(c); + } + tree.setEmoji(emoji); + } + this.maxDepth = maxDepth; + } + + + /** + * Checks if sequence of chars contain an emoji. + * + * @param sequence Sequence of char that may contain emoji in full or + * partially. + * + * @return + * <li> + * Matches.EXACTLY if char sequence in its entirety is an emoji + * </li> + * <li> + * Matches.POSSIBLY if char sequence matches prefix of an emoji + * </li> + * <li> + * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an + * emoji + * </li> + */ + public Matches isEmoji(char[] sequence) { + return isEmoji(sequence, 0, sequence.length); + } + + /** + * Checks if the sequence of chars within the given bound indices contain an emoji. + * @see #isEmoji(char[]) + */ + public Matches isEmoji(char[] sequence, int start, int end) { + if (start < 0 || start > end || end > sequence.length) { + throw new ArrayIndexOutOfBoundsException( + "start " + start + ", end " + end + ", length " + sequence.length); + } + + if (sequence == null) { + return Matches.POSSIBLY; + } + + Node tree = root; + for (int i = start; i < end; i++) { + if (!tree.hasChild(sequence[i])) { + return Matches.IMPOSSIBLE; + } + tree = tree.getChild(sequence[i]); + } + + return tree.isEndOfEmoji() ? Matches.EXACTLY : Matches.POSSIBLY; + } + + + /** + * Finds Emoji instance from emoji unicode + * @param unicode unicode of emoji to get + * @return Emoji instance if unicode matches and emoji, null otherwise. + */ + public Emoji getEmoji(String unicode) { + return getEmoji(unicode.toCharArray(), 0, unicode.length()); + } + + Emoji getEmoji(char[] sequence, int start, int end) { + if (start < 0 || start > end || end > sequence.length) { + throw new ArrayIndexOutOfBoundsException( + "start " + start + ", end " + end + ", length " + sequence.length); + } + + Node tree = root; + for (int i = 0; i < end; i++) { + if (!tree.hasChild(sequence[i])) { + return null; + } + tree = tree.getChild(sequence[i]); + } + return tree.getEmoji(); + } + + public enum Matches { + EXACTLY, POSSIBLY, IMPOSSIBLE; + + public boolean isExactMatch() { + return this == EXACTLY; + } + + public boolean isImpossibleMatch() { + return this == IMPOSSIBLE; + } + } + + private class Node { + private Map children = new HashMap(); + private Emoji emoji; + + private void setEmoji(Emoji emoji) { + this.emoji = emoji; + } + + private Emoji getEmoji() { + return emoji; + } + + private boolean hasChild(char child) { + return children.containsKey(child); + } + + private void addChild(char child) { + children.put(child, new Node()); + } + + private Node getChild(char child) { + return children.get(child); + } + + private boolean isEndOfEmoji() { + return emoji != null; + } + } +} diff --git a/src/main/java/com/vdurmont/emoji/Fitzpatrick.java b/src/main/java/com/vdurmont/emoji/Fitzpatrick.java index bdbcbc65..919d89ba 100644 --- a/src/main/java/com/vdurmont/emoji/Fitzpatrick.java +++ b/src/main/java/com/vdurmont/emoji/Fitzpatrick.java @@ -1,58 +1,58 @@ -package com.vdurmont.emoji; - -/** - * Enum that represents the Fitzpatrick modifiers supported by the emojis. - */ -public enum Fitzpatrick { - /** - * Fitzpatrick modifier of type 1/2 (pale white/white) - */ - TYPE_1_2("\uD83C\uDFFB"), - - /** - * Fitzpatrick modifier of type 3 (cream white) - */ - TYPE_3("\uD83C\uDFFC"), - - /** - * Fitzpatrick modifier of type 4 (moderate brown) - */ - TYPE_4("\uD83C\uDFFD"), - - /** - * Fitzpatrick modifier of type 5 (dark brown) - */ - TYPE_5("\uD83C\uDFFE"), - - /** - * Fitzpatrick modifier of type 6 (black) - */ - TYPE_6("\uD83C\uDFFF"); - - /** - * The unicode representation of the Fitzpatrick modifier - */ - public final String unicode; - - Fitzpatrick(String unicode) { - this.unicode = unicode; - } - - - public static Fitzpatrick fitzpatrickFromUnicode(String unicode) { - for (Fitzpatrick v : values()) { - if (v.unicode.equals(unicode)) { - return v; - } - } - return null; - } - - public static Fitzpatrick fitzpatrickFromType(String type) { - try { - return Fitzpatrick.valueOf(type.toUpperCase()); - } catch (IllegalArgumentException e) { - return null; - } - } -} +package com.vdurmont.emoji; + +/** + * Enum that represents the Fitzpatrick modifiers supported by the emojis. + */ +public enum Fitzpatrick { + /** + * Fitzpatrick modifier of type 1/2 (pale white/white) + */ + TYPE_1_2("\uD83C\uDFFB"), + + /** + * Fitzpatrick modifier of type 3 (cream white) + */ + TYPE_3("\uD83C\uDFFC"), + + /** + * Fitzpatrick modifier of type 4 (moderate brown) + */ + TYPE_4("\uD83C\uDFFD"), + + /** + * Fitzpatrick modifier of type 5 (dark brown) + */ + TYPE_5("\uD83C\uDFFE"), + + /** + * Fitzpatrick modifier of type 6 (black) + */ + TYPE_6("\uD83C\uDFFF"); + + /** + * The unicode representation of the Fitzpatrick modifier + */ + public final String unicode; + + Fitzpatrick(String unicode) { + this.unicode = unicode; + } + + + public static Fitzpatrick fitzpatrickFromUnicode(String unicode) { + for (Fitzpatrick v : values()) { + if (v.unicode.equals(unicode)) { + return v; + } + } + return null; + } + + public static Fitzpatrick fitzpatrickFromType(String type) { + try { + return Fitzpatrick.valueOf(type.toUpperCase()); + } catch (IllegalArgumentException e) { + return null; + } + } +} diff --git a/src/main/java/com/vdurmont/emoji/TableGenerator.java b/src/main/java/com/vdurmont/emoji/TableGenerator.java new file mode 100644 index 00000000..34c6f2ec --- /dev/null +++ b/src/main/java/com/vdurmont/emoji/TableGenerator.java @@ -0,0 +1,67 @@ +package com.vdurmont.emoji; + +import java.io.FileWriter; +import java.io.IOException; + +/** + * This app generate the emoji table in the README ;) + *

+ * Run with: + * mvn exec:java -Dexec.mainClass="com.vdurmont.emoji.TableGenerator" + */ +public class TableGenerator { + public static void main(String[] args) throws IOException { + StringBuilder sb = new StringBuilder(); + + // Table header + sb.append("| Emoji | Aliases | Emoji | Aliases |\n"); + sb.append("| :---: | ------- | :---: | ------- |\n"); + + // Emojis! + int i = 0; + for (Emoji emoji : EmojiManager.getAll()) { + String aliases = getAliases(emoji); + + if (i % 2 == 0) { + sb.append("| ") + .append(emoji.getUnicode()) + .append(" | ") + .append(aliases) + .append(" |"); + } else { + sb.append(" ") + .append(emoji.getUnicode()) + .append(" | ") + .append(aliases) + .append(" |\n"); + } + + i++; + } + + // Output! + if (args.length > 0) { + String path = args[0]; + FileWriter writer = new FileWriter(path); + writer.write(sb.toString()); + System.out.println("Written on " + path); + } else { + System.out.println(sb.toString()); + } + } + + private static String getAliases(Emoji emoji) { + StringBuilder result = new StringBuilder(); + boolean first = true; + for (String alias : emoji.getAliases()) { + if (first) { + first = false; + } else { + result.append(", "); + } + result.append(alias); + } + + return result.toString(); + } +} diff --git a/src/main/java/com/vdurmont/emoji/validator/EmojiValidator.java b/src/main/java/com/vdurmont/emoji/validator/EmojiValidator.java new file mode 100644 index 00000000..2047e90b --- /dev/null +++ b/src/main/java/com/vdurmont/emoji/validator/EmojiValidator.java @@ -0,0 +1,5 @@ +package com.vdurmont.emoji.validator; + +public interface EmojiValidator { + boolean isValid(String string); +} diff --git a/src/main/java/com/vdurmont/emoji/validator/NonNullAndEmptyValidator.java b/src/main/java/com/vdurmont/emoji/validator/NonNullAndEmptyValidator.java new file mode 100644 index 00000000..b3ad8fda --- /dev/null +++ b/src/main/java/com/vdurmont/emoji/validator/NonNullAndEmptyValidator.java @@ -0,0 +1,10 @@ +package com.vdurmont.emoji.validator; + +import com.vdurmont.emoji.EmojiParser; + +public class NonNullAndEmptyValidator implements EmojiValidator { + @Override + public boolean isValid(String string) { + return string != null && EmojiParser.removeAllEmojis(string).isEmpty(); + } +} diff --git a/src/test/java/com/vdurmont/emoji/EmojiManagerTest.java b/src/test/java/com/vdurmont/emoji/EmojiManagerTest.java index 10918386..999be4ab 100644 --- a/src/test/java/com/vdurmont/emoji/EmojiManagerTest.java +++ b/src/test/java/com/vdurmont/emoji/EmojiManagerTest.java @@ -274,4 +274,26 @@ public void getByUnicode_returns_correct_emoji() { assertEquals(wavingHand, e.getUnicode()); assertEquals("waving hand sign", e.getDescription()); } + + @Test + public void EmojiManagerTest_with_depicts_a_person_swimming() { + String depicts_a_person_swimming = "\uD83C\uDFCA\u200D♂\uFE0F"; //šŸŠā€ā™‚ļø + boolean isItEmoji = EmojiManager.isEmoji(depicts_a_person_swimming); + assertFalse(isItEmoji); + } + + @Test + public void EmojiManagerTest_with_woman_rowing_boat() { + String woman_rowing_boat = "\uD83D\uDEA3\u200D♀\uFE0F"; //šŸš£ā€ā™€ļø + boolean isItEmoji = EmojiManager.isEmoji(woman_rowing_boat); + assertFalse(isItEmoji); + } + + + @Test + public void EmojiManagerTest_with_skier() { + String Skier = "ā›·\uFE0F";//ā›·ļø + boolean isItEmoji = EmojiManager.isEmoji(Skier); + assertFalse(isItEmoji); + } } diff --git a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java index 2965cebb..46f25c9a 100644 --- a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java +++ b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.List; +import static junit.framework.TestCase.assertNotSame; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; @@ -550,4 +551,11 @@ public void parseToAliases_with_first_medal() { // THEN assertEquals(":first_place_medal:", result); } + + @Test + public void testParserWithTwoEmojisButGettingForFour() { + String input = "I'm šŸ‘ØšŸ½ā€šŸ”¬ and she's šŸ‘©šŸ»ā€šŸ’»"; + List emojis = EmojiParser.extractEmojis(input); + assertNotSame("Expected 2 but given 4", 2, emojis.size()); + } } diff --git a/src/test/java/com/vdurmont/emoji/EmojiTrieTest.java b/src/test/java/com/vdurmont/emoji/EmojiTrieTest.java new file mode 100644 index 00000000..c243cfaa --- /dev/null +++ b/src/test/java/com/vdurmont/emoji/EmojiTrieTest.java @@ -0,0 +1,29 @@ +package com.vdurmont.emoji; + +import org.junit.Test; + +import static com.vdurmont.emoji.EmojiManager.isEmoji; +import static org.junit.Assert.assertEquals; + +public class EmojiTrieTest { + @Test + public void testIsEmoji_ValidEmoji() { + char[] sequence = {'\uD83D', '\uDE0A', 'a', 'b'}; + EmojiTrie.Matches result = isEmoji(sequence); + assertEquals(EmojiTrie.Matches.IMPOSSIBLE, result); + } + + @Test + public void testIsEmoji_NoEmoji() { + char[] sequence = {'a', 'b', 'c'}; + EmojiTrie.Matches result = isEmoji(sequence); + assertEquals(EmojiTrie.Matches.IMPOSSIBLE, result); + } + + @Test + public void testIsEmoji_EmptySequence() { + char[] sequence = {}; + EmojiTrie.Matches result = isEmoji(sequence); + assertEquals(EmojiTrie.Matches.POSSIBLY, result); + } +}