-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordSenseDisambiguation.py
54 lines (45 loc) · 1.5 KB
/
WordSenseDisambiguation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 14 13:14:10 2018
@author: tharunngolthi
"""
from nltk.corpus import wordnet as wn
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
def SimplifiedLesk(word, sentence):
best_sense = None
max_overlap = 0
word = wn.morphy(word)
senses = wn.synsets(word)
for sense in senses:
overlap = ComputeOverlap(sense,sentence)
for hyponyms in sense.hyponyms():
overlap += ComputeOverlap(hyponyms,sentence)
if overlap > max_overlap:
max_overlap = overlap
best_sense = sense
return best_sense
def ComputeOverlap(synset, sentence):
gloss = synset.definition()
gloss = set(tokenizer.tokenize(gloss))
for example in synset.examples():
gloss=gloss.union(example)
gloss = gloss.difference(stopwordset)
sentence = set(sentence.split(" "))
gloss=gloss.intersection(sentence)
return len(gloss)
tokenizer = RegexpTokenizer('\w+')
stopwordset = set(stopwords.words('english'))
if __name__ == '__main__':
sentence = input("Enter sentence:")
word = input("Enter word:")
lesk = SimplifiedLesk(word,sentence)
print ("**************************", end = "\n")
print("Final Chosen Sense:", end=" ")
if lesk is not None:
print (lesk)
print ("Definition: ",lesk.definition(), end = "\n")
print ("Examples:")
for i in lesk.examples():
print ("=> ",i)