Wordnet is strict about antonyms, but you can get more by using the
similar_to relationship:
Here is some code that gets the expanded antonym sets (and more).
#!/usr/bin/python
#
# list pairs of antonmyous synsets
#
import nltk
from nltk.corpus import wordnet as wn
lng='arb'
#Include similar pairs
similar = False
def lemma2id(lemma):
ssid = "%08d-%s" % (lemma.synset().offset(), lemma.synset().pos())
return ssid.replace('-s', '-a')
def ss2id(ss):
ssid= "%08d-%s" % (ss.offset(), ss.pos())
return ssid.replace('-s', '-a')
def id2ss(id):
return wn._synset_from_pos_and_offset(id[-1],int(id[0:8]))
known = set()
for ss in wn.all_synsets():
for l in ss.lemmas():
for a in l.antonyms():
link = 'ant'
pair = lemma2id(l) + "\t" + lemma2id(a)
riap = lemma2id(a) + "\t" + lemma2id(l)
if pair in known or riap in known:
print ("# " + "\t".join([lemma2id(l), lemma2id(a),
l.name(),
a.name(), link]))
else:
print ("\t".join([lemma2id(l), lemma2id(a),
l.name(),
a.name(), link]))
known.add(pair)
known.add(riap)
if similar:
for s in a.synset().similar_tos():
link = 'ant:sim'
pair = lemma2id(l) + "\t" + ss2id(s)
riap = ss2id(s) + "\t" + lemma2id(l)
if pair in known or riap in known:
print ("# " + "\t".join([lemma2id(l),
ss2id(s),
l.name(),
s.name()[:-5], link]))
else:
print ("\t".join([lemma2id(l), ss2id(s),
l.name(),
s.name()[:-5], link]))
known.add(pair)
known.add(riap)
print('============ find oppsosites for {}'.format(lng))
for pair in known:
s1=id2ss(pair.split('\t')[0])
a1=s1.lemma_names(lang=lng)
s2=id2ss(pair.split('\t')[1])
a2=s2.lemma_names(lang=lng)
print(s1,s2,a1,a2)
lng='eng'
print('============ calculate ambiguity for {}'.format(lng))
total = 0
lemtotal = 0
antonyms = 0
nonantonyms=0
for pair in known:
s1=id2ss(pair.split('\t')[0])
a1=s1.lemma_names(lang=lng)
s2=id2ss(pair.split('\t')[1])
a2=s2.lemma_names(lang=lng)
if a1 and a2:
total +=1
lemtotal += len(a1)*len(a2)
yes=[]
no=[]
for l1 in s1.lemmas():
for l2 in s2.lemmas():
if l1 in l2.antonyms():
antonyms +=1
yes.append(
l1.name()+"↔"+
l2.name())
else:
nonantonyms +=1
no.append(
l1.name()+"↭"+
l2.name())
print(s1,s2,a1,a2,yes,no)
print('-------------')
print('Average number of opposites = {}'.format(lemtotal/total))
print('Percentage non antonym opposites =
{}'.format(100*(lemtotal-total)/lemtotal))
print('Antonyms = {} ({:2.1f}%), Non-Antonyms = {}, Total = {}'.format(antonyms,
100*antonyms/(antonyms+nonantonyms),
nonantonyms,
antonyms+nonantonyms))
> To unsubscribe from this group and stop receiving emails from it, send an email to
.