[hltdi-l3] push by onlysk...@gmail.com - Search in constraint satisfaction (for both group assignment and outpu... on 2014-05-16 04:03 GMT

1 view
Skip to first unread message

hltd...@googlecode.com

unread,
May 16, 2014, 12:03:45 AM5/16/14
to hltdi-...@googlegroups.com
Revision: c5bdc74b96c2
Branch: default
Author: Michael Gasser <gas...@cs.indiana.edu>
Date: Fri May 16 04:02:37 2014 UTC
Log: Search in constraint satisfaction (for both group assignment and
output sequencing).
http://code.google.com/p/hltdi-l3/source/detail?r=c5bdc74b96c2

Modified:
/hiiktuu.py
/hiiktuu/constraint.py
/hiiktuu/cs.py
/hiiktuu/sentence.py
/l3xdg/solver.py
/paperdrafts/lglp/acl.bst
/paperdrafts/lglp/lglp14.pdf
/paperdrafts/lglp/lglp14.tex
/paperdrafts/lglp/mind.png

=======================================
--- /hiiktuu.py Thu May 15 18:53:03 2014 UTC
+++ /hiiktuu.py Fri May 16 04:02:37 2014 UTC
@@ -28,7 +28,7 @@
# 2014.02.09
# -- Created

-__version__ = 0.8
+__version__ = 0.9

import hiiktuu

@@ -44,8 +44,8 @@
s = hiiktuu.Sentence(raw='John kicked the bucket', language=eng,
target=spa,
verbosity=verbosity)
# s.do(verbosity=verbosity)
- s.initialize()
-# s.solve()
+ s.initialize(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
# sol = s.solutions[0]
# sol.translate()
return s
@@ -60,7 +60,9 @@
eng, spa = hiiktuu.Language.load('eng', 'spa')
s = hiiktuu.Sentence(raw="it's the end of the world", language=eng,
target=spa,
verbosity=verbosity)
- s.do(verbosity=verbosity)
+ s.initialize(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
+# s.do(verbosity=verbosity)
return s

def ate_fish(verbosity=0):
@@ -72,7 +74,8 @@
"""
amh, orm = hiiktuu.Language.load('amh', 'orm')
s = hiiktuu.Sentence(raw="አሳ በላ", language=amh, target=orm,
verbosity=verbosity)
- s.do(verbosity=verbosity)
+ s.initialize(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
return s

def never_eaten_fish(verbosity=0, trans=True):
@@ -86,10 +89,9 @@
amh, orm = hiiktuu.Language.load('amh', 'orm')
s = hiiktuu.Sentence(raw="አሳ በልቶ አያውቅም", language=amh, target=orm,
verbosity=verbosity)
+ s.initialize(verbosity=verbosity)
if trans:
-# s.initialize(verbosity=verbosity)
-# s.solve(verbosity=verbosity)
- s.do(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
else:
s.parse(verbosity=verbosity)
return s
@@ -104,10 +106,8 @@
amh, orm = hiiktuu.Language.load('amh', 'orm')
s = hiiktuu.Sentence(raw="አሳ በልተው አያውቅም", language=amh, target=orm,
verbosity=verbosity)
- if trans:
- s.do(verbosity=verbosity)
- else:
- s.parse(verbosity=verbosity)
+ s.initialize(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
return s

def cantar_las_cuarenta(trans=True, verbosity=0):
@@ -124,7 +124,8 @@
s = hiiktuu.Sentence(raw="Paula les cantó las cuarenta",
language=spa, target=eng if trans else None,
verbosity=verbosity)
- s.do(verbosity=verbosity)
+ s.initialize(verbosity=verbosity)
+ s.solve(verbosity=verbosity)
return s

def ui():
=======================================
--- /hiiktuu/constraint.py Thu May 15 18:53:03 2014 UTC
+++ /hiiktuu/constraint.py Fri May 16 04:02:37 2014 UTC
@@ -1681,48 +1681,6 @@
self.constraints.append(UnionSelection(main, sel, seqvars,
record=False,
weight=1, maxset=None))

-## def fails(self, dstore=None):
-## """Fail if any of the UnionSelection constraints over the
selvars and mainvars indexed by the
-## lower bound of selvar fail."""
-## for index in self.selvar.get_lower(dstore=dstore):
-## constraint = self.constraints[index]
-## if constraint and constraint.fails(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is entailed if all of the constraints indexed by the upper
bound of selvar are entailed."""
-## for index in self.selvar.get_upper(dstore=dstore):
-## constraint = self.constraints[index]
-## if constraint and not constraint.is_entailed(dstore=dstore):
-## return False
-## return True
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=[]):
-## """Run infer() on all constraints indexed in the lower bound of
selvar,
-## and remove indices from the upper bound of selvar if the indexed
constraint
-## fails."""
-## selvar = self.selvar
-## selupper = selvar.get_upper(dstore=dstore)
-## sellower = selvar.get_lower(dstore=dstore)
-## for index, constraint in enumerate(self.constraints):
-## if not constraint:
-## continue
-## if index in sellower:
-## state, changed = constraint.infer(dstore=dstore)
-## # If any variable changed as a result this, return it
-## if changed:
-## return state, changed
-## elif index in selupper:
-## # A constraint indexed by a value in the upper bound of
selvar failed
-## if constraint.fails(dstore=dstore):
-## # Try to remove this index from the upper bound of
selvar
-## if selvar.discard_upper(index, dstore=dstore,
-## constraint=(verbosity>1 or
selvar in tracevar) and self):
-## return Constraint.sleeping, {selvar}
-##
-## return Constraint.sleeping, set()
-
class ComplexSetConvexity(ComplexConstraint):
"""Each value of selection variable (potentially) selects a set
convexity constraint over one of
the seqvars."""
@@ -1740,45 +1698,6 @@
# Don't record this constraint in the variables
self.constraints.append(SetConvexity(cv, weight=weight,
record=False))

-## def fails(self, dstore=None):
-## """Fail if any of the SetConvexity constraints over the
convexvars indexed by the
-## lower bound of selvar fail."""
-## for index in self.selvar.get_lower(dstore=dstore):
-## constraint = self.constraints[index]
-## if constraint.fails(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is entailed if all of the constraints indexed by the upper
bound of selvar are entailed."""
-## for index in self.selvar.get_upper(dstore=dstore):
-## constraint = self.constraints[index]
-## if not constraint.is_entailed(dstore=dstore):
-## return False
-## return True
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=[]):
-## """Run infer() on all constraints indexed in the lower bound of
selvar,
-## and remove indices from the upper bound of selvar if the indexed
constraint
-## fails."""
-## selvar = self.selvar
-## selupper = selvar.get_upper(dstore=dstore)
-## sellower = selvar.get_lower(dstore=dstore)
-## for index, constraint in enumerate(self.constraints):
-## if index in sellower:
-## state, changed = constraint.infer(dstore=dstore)
-## # If any variable changed as a result this
-## if changed:
-## return state, changed
-## elif index in selupper:
-## if constraint.fails(dstore=dstore):
-## # Try to remove this index from the upper bound of
selvar
-## if selvar.discard_upper(index, dstore=dstore,
-## constraint=(verbosity>1 or
selvar in tracevar) and self):
-## return Constraint.sleeping, {selvar}
-##
-## return Constraint.sleeping, set()
-
class IntersectionSelection(Selection):
'''All variables are set vars. Select the intersection of the selected
sets.'''

@@ -2092,48 +2011,6 @@
# Create an AgrSelection constraint for this agr selection
variable
self.constraints.append(AgrSelection(featvars, sel,
seqvars, record=False, weight=1))

-## def fails(self, dstore=None):
-## """Fail if any of the AgrSelection constraints over the selvars
indexed by the
-## lower bound of selvar fail."""
-## for index in self.selvar.get_lower(dstore=dstore):
-## constraint = self.constraints[index]
-## if constraint and constraint.fails(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is entailed if all of the constraints indexed by the upper
bound of selvar are entailed."""
-## for index in self.selvar.get_upper(dstore=dstore):
-## constraint = self.constraints[index]
-## if constraint and not constraint.is_entailed(dstore=dstore):
-## return False
-## return True
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=[]):
-## """Run infer() on all constraints indexed in the lower bound of
selvar,
-## and remove indices from the upper bound of selvar if the indexed
constraint
-## fails."""
-## selvar = self.selvar
-## selupper = selvar.get_upper(dstore=dstore)
-## sellower = selvar.get_lower(dstore=dstore)
-## for index, constraint in enumerate(self.constraints):
-## if not constraint:
-## continue
-## if index in sellower:
-## state, changed = constraint.infer(dstore=dstore)
-## # If any variable changed as a result this, return it
-## if changed:
-## return state, changed
-## elif index in selupper:
-## # A constraint indexed by a value in the upper bound of
selvar failed
-## if constraint.fails(dstore=dstore):
-## # Try to remove this index from the upper bound of
selvar
-## if selvar.discard_upper(index, dstore=dstore,
-## constraint=(verbosity>1 or
selvar in tracevar) and self):
-## return Constraint.sleeping, {selvar}
-##
-## return Constraint.sleeping, set()
-
##class SimplePrecedenceSelection(Constraint):
## """
## Simpler than PrecedenceSelection.
@@ -2697,331 +2574,3 @@
self.constraints = Union(self.variables, problem=self.problem,
weight=self.weight, record=self.record).constraints
self.constraints.extend(Disjoint(self.variables[1:],
problem=self.problem, weight=self.weight, record=self.record).constraints)

-#### Propagators behaving in various weird ways
-##
-##class ReifiedMembership(Constraint):
-## """Constraint that has variables I, S, and J and binds J to 1 if I
is in S, 0 otherwise."""
-##
-## def __init__(self, ivar, svar, truthvar, problem=None, weight=1):
-## Constraint.__init__(self, [ivar, svar, truthvar],
problem=problem,
-## weight=weight)
-## self.ivar = ivar
-## self.svar = svar
-## self.truthvar = truthvar
-## self.member_constraint = IVMemberSV([ivar, svar], problem=None,
propagate=False,
-## weight=weight)
-## self.name = '?{0} c {1}?'.format(self.ivar, self.svar)
-##
-## def fails(self, dstore=None):
-## """Fail if the value of truthvar disagrees with the membership
constraint."""
-## if self.member_constraint.fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## elif self.member_constraint.is_entailed(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Succeed if the value of the truthvar agrees with the
membership constraint."""
-## if self.member_constraint.fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## elif self.member_constraint.is_entailed(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## if self.truthvar.determined(dstore=dstore) is not False:
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## # Treat membership constraint in the normal way,
returning the state
-## # and changed variables resulting from inference with it
-## return self.member_constraint.infer(dstore=dstore,
verbosity=verbosity, tracevar=tracevar)
-## else:
-## # Make membership constraint fail
-## changed = set()
-## iv = self.ivar
-## sv = self.svar
-## # Constrain the values of IV to be outside the lower
bound of SV
-## if iv.discard_upper(sv.get_lower(dstore=dstore),
dstore=dstore,
-## constraint=(verbosity>1 or iv in
tracevar) and self):
-## changed.add(iv)
-## # If IV is determined, constrain SV to exclude it
-## if iv.determined(dstore=dstore, verbosity=verbosity) is
not False:
-## if sv.discard_upper(iv.get_domain(dstore=dstore),
dstore=dstore,
-## constraint=(verbosity>1 or sv in
tracevar) and self):
-## changed.add(sv)
-## if verbosity > 1 and changed:
-## print(' Variables {} changed'.format(changed))
-## return Constraint.sleeping, changed
-## elif self.member_constraint.is_entailed(dstore=dstore):
-## # Membership constraint succeeds, so determine truth
variable at 1
-## tv = self.truthvar
-## if tv.determine(1, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-## elif self.member_constraint.fails(dstore=dstore):
-## # Membership constraint fails, so determine truth variable
at 0
-## tv = self.truthvar
-## if tv.determine(0, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-##
-## return Constraint.sleeping, set()
-##
-##class ReifiedInclusion(Constraint):
-## """Constraint that has variables S1, S2, and J and binds J to 1 if
S1 is included in S2 (and S2 is not empty),
-## 0 if S1 is not included in S2, and makes no commitment about J if S1
= S2 = {}."""
-##
-## def __init__(self, svar1, svar2, truthvar, problem=None, weight=1):
-## Constraint.__init__(self, [svar1, svar2, truthvar],
problem=problem,
-## weight=weight)
-## self.svar1 = svar1
-## self.svar2 = svar2
-## self.truthvar = truthvar
-## inclusion_constraint = Inclusion([svar1, svar2], problem=None,
propagate=False,
-## weight=weight)
-## self.inclusion_constraint = inclusion_constraint.constraints[0]
-## self.name = '?{0} c= {1}?'.format(self.svar1, self.svar2)
-##
-## def fails(self, dstore=None):
-## """Fail if the value of truthvar disagrees with the inclusion
constraint."""
-## if self.inclusion_constraint.fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## elif self.inclusion_constraint.is_entailed(dstore=dstore) and
self.svar2.get_lower(dstore=dstore):
-## # We need to make sure that svar2 is not empty, in which
case we don't commit
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Succeed if the value of the truthvar agrees with the
inclusion constraint."""
-## if self.inclusion_constraint.fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## elif self.inclusion_constraint.is_entailed(dstore=dstore) and
self.svar2.get_lower(dstore=dstore):
-## # We need to make sure that svar2 is not empty, in which
case we don't commit
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## if self.truthvar.determined(dstore=dstore) is not False:
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## # Treat inclusion constraint in the normal way,
returning the state
-## # and changed variables resulting from inference with it
-## return self.inclusion_constraint.infer(dstore=dstore,
verbosity=verbosity, tracevar=tracevar)
-##
-## elif self.inclusion_constraint.is_entailed(dstore=dstore) and
self.svar2.get_lower(dstore=dstore):
-## # Inclusion constraint succeeds and svar2 is not empty, so
determine truth variable at 1
-## tv = self.truthvar
-## if tv.determine(1, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-##
-## elif self.inclusion_constraint.fails(dstore=dstore):
-## # Inclusion constraint fails, so determine truth variable at
0
-## tv = self.truthvar
-## if tv.determine(0, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-##
-## return Constraint.sleeping, set()
-##
-##class ReifiedEquality(Constraint):
-## """Constraint that has variables S1, S2, and J and binds J to 1 if
S1 is equal to S2,
-## and 0 if S1 is not equal to S2."""
-##
-## def __init__(self, svar1, svar2, truthvar, problem=None,
-## weight=1):
-## Constraint.__init__(self, [svar1, svar2, truthvar],
problem=problem,
-## weight=weight)
-## self.svar1 = svar1
-## self.svar2 = svar2
-## self.truthvar = truthvar
-## equality_constraint = Equality([svar1, svar2], problem=None,
propagate=False,
-## weight=weight)
-## self.eq_constraints = equality_constraint.constraints
-## self.name = '?{0} = {1}?'.format(self.svar1, self.svar2)
-##
-## def fails(self, dstore=None):
-## """Fail if the value of truthvar disagrees with the equality
constraint."""
-## if self.eq_constraints[0].fails(dstore=dstore) or
self.eq_constraints[1].fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## elif self.eq_constraints[0].is_entailed(dstore=dstore) and
self.eq_constraints[1].is_entailed(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Succeed if the value of the truthvar agrees with the equality
constraint."""
-## if self.eq_constraints[0].fails(dstore=dstore) and
self.eq_constraints[1].fails(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 0:
-## return True
-## elif self.eq_constraints[0].is_entailed(dstore=dstore) and
self.eq_constraints[1].is_entailed(dstore=dstore):
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## if self.truthvar.determined(dstore=dstore) is not False:
-## if self.truthvar.get_value(dstore=dstore) is 1:
-## # Treat equality constraints in the normal way,
returning the state
-## # and changed variables resulting from inference with
them
-## state0, changed0 =
self.eq_constraints[0].infer(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
-## state1, changed1 =
self.eq_constraints[1].infer(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
-## if state0 == state1:
-## state = state0
-## elif state0 == Constraint.failed or state1 ==
Constraint.failed:
-## state = Constraint.failed
-## else:
-## state = Constraint.sleeping
-## changed = changed0 | changed1
-## return state, changed
-## else:
-## # Make inclusion constraint fail or have both sets empty
-## # How??
-## pass
-##
-## elif self.eq_constraints[0].is_entailed(dstore=dstore) and
self.eq_constraints[1].is_entailed(dstore=dstore):
-## # Equality constraint succeeds, so determine truth variable
at 1
-## tv = self.truthvar
-## if tv.determine(1, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-##
-## elif self.eq_constraints[0].fails(dstore=dstore) and
self.eq_constraints[1].fails(dstore=dstore):
-## # Equality constraint fails, so determine truth variable at 0
-## tv = self.truthvar
-## if tv.determine(0, dstore=dstore,
-## constraint=(verbosity>1 or tv in tracevar)
and self):
-## return Constraint.sleeping, {tv}
-##
-## return Constraint.sleeping, set()
-##
-##class LogConstraint(Constraint):
-## """'Logical' propagators: implication, equivalence (maybe
others?)."""
-##
-## def make_true(self, v, dstore=None, verbosity=0, tracevar=None):
-## if isinstance(v, IVar):
-## # Remove 0 from v's domain
-## if v.discard_value(0, dstore=dstore,
-## constraint=(verbosity>1 or v in
tracevar) and self):
-## return True
-## else:
-## # Make sure v has at least element in it
-## if v.strengthen_lower_card(1, dstore=dstore,
-## constraint=(verbosity>1 or v in
tracevar) and self):
-## return True
-## return False
-##
-## def make_false(self, v, dstore=None, verbosity=0, tracevar=None):
-## if isinstance(v, IVar):
-## # Determine v at 0
-## if v.determine(0, dstore=dstore,
-## constraint=(verbosity>1 or v in tracevar)
and self):
-## return True
-## else:
-## # Determine v at set()
-## if v.determine(set(), dstore=dstore,
-## constraint=(verbosity>1 or v in tracevar)
and self):
-## return True
-## return False
-##
-##class LogEquivalence(LogConstraint):
-## """Two set or integer variables; true if both or neither are
non-empty or non-zero."""
-##
-## def __init__(self, vars, problem=None, weight=1):
-## Constraint.__init__(self, vars, problem=problem, weight=weight)
-## self.name = '{0} <-> {1}'.format(self.variables[0],
self.variables[1])
-##
-## def fails(self, dstore=None):
-## """Fail if one variable is 'true' and the other isn't."""
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-## if v0.is_true(dstore=dstore) and v1.is_false(dstore=dstore):
-## return True
-## elif v0.is_false(dstore=dstore) and v1.is_true(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Succeed if both variables are 'true' or 'false'."""
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-## if v0.is_true(dstore=dstore) and v1.is_true(dstore=dstore):
-## return True
-## elif v0.is_false(dstore=dstore) and v1.is_false(dstore=dstore):
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## changed = set()
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-##
-## if v0.is_true(dstore=dstore):
-## if self.make_true(v1, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## return Constraint.sleeping, {v1}
-## elif v0.is_false(dstore=dstore):
-## if self.make_false(v1, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## # Both are determined
-## return Constraint.entailed, {v1}
-##
-## if v1.is_true(dstore=dstore):
-## if self.make_true(v0, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## return Constraint.sleeping, {v0}
-## elif v1.is_false(dstore=dstore):
-## if self.make_false(v0, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## # Both are determined
-## return Constraint.entailed, {v0}
-##
-## return Constraint.sleeping, set()
-##
-##class LogImplication(LogConstraint):
-## """Two set or integer variables:
-## v0 -> v1
-## false if v0 is non-zero or non-empty and v1 is zero or empty; true
otherwise."""
-##
-## def __init__(self, vars, problem=None, weight=1):
-## Constraint.__init__(self, vars, problem=problem, weight=weight)
-## self.name = '{0} -> {1}'.format(self.variables[0],
self.variables[1])
-##
-## def fails(self, dstore=None):
-## """Fail if v0 is 'true' and v1 is 'false'."""
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-## if v0.is_true(dstore=dstore) and v1.is_false(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Succeed unless v0 is 'true' and v1 is 'false'."""
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-## if v0.is_false(dstore=dstore):
-## return True
-## if v0.is_true(dstore=dstore) and v1.is_true(dstore=dstore):
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## changed = set()
-## v0 = self.variables[0]
-## v1 = self.variables[1]
-##
-## # if v0 is true, v1 must be
-## if v0.is_true(dstore=dstore):
-## if self.make_true(v1, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## return Constraint.sleeping, {v1}
-##
-## # if v1 is false, v0 must be
-## if v1.is_false(dstore=dstore):
-## if self.make_false(v0, dstore=dstore, verbosity=verbosity,
tracevar=tracevar):
-## # Both are determined
-## return Constraint.entailed, {v0}
-##
-## return Constraint.sleeping, set()
=======================================
--- /hiiktuu/cs.py Wed May 14 07:28:54 2014 UTC
+++ /hiiktuu/cs.py Fri May 16 04:02:37 2014 UTC
@@ -29,9 +29,11 @@
# -- Created
# 2014.05.11
# -- SearchState class created, so that Solver doesn't have to do
double-duty.
+# 2014.05.15
+# -- Search implemented in Solver.

from .constraint import *
-import queue
+import queue, random

class Solver:
"""A solver for a constraint satisfaction problem, actually a state in
the search space."""
@@ -47,14 +49,10 @@
def __init__(self, constraints, dstore, name='',
description='', verbosity=0):
self.constraints = constraints
- self.dstore = dstore
# Used in solver's printname
self.description = description
# Solver (state) that generated this one
self.verbosity=verbosity
- self.entailed = []
- self.failed = []
- self.status = Solver.running
self.id = Solver.id
self.name = name or "({})={}=".format(description, self.id)
self.init_state = SearchState(solver=self, dstore=dstore,
@@ -65,118 +63,50 @@
def __repr__(self):
return "Solver{}".format(self.name)

- def exit(self, result, verbosity=0):
- if result == Constraint.failed:
- return True
- else:
- return self.fixed_point(result, verbosity=verbosity)
+ def generator(self, cutoff=100, initial=None,
+ test_verbosity=False, expand_verbosity=False,
+ tracevar=None):
+ '''A generator for solutions. Uses best-first search.'''
+ tracevar = tracevar or []
+ fringe = queue.PriorityQueue()
+ init_state = initial or self.init_state
+ fringe.put((init_state.get_value(), init_state))
+ n = 0
+ solutions = []
+ ambiguity = False
+ while not fringe.empty() and n < cutoff:
+ if n > 0 and not ambiguity:
+ print("Ambiguity: expanding from best state")
+ ambiguity = True
+ if (n+1) % 50 == 0 or test_verbosity or expand_verbosity:
+ if test_verbosity or expand_verbosity:
+ print()
+ print('>>>> SEARCH STATE {} <<<<'.format(n+1))
+ if n >= cutoff:
+ print('STOPPING AT CUTOFF')
+ priority, state = fringe.get()
+ # Goal test for this state
+ state.run(verbosity=test_verbosity, tracevar=tracevar)
+ if state.status == SearchState.succeeded:
+ # Return this state
+ yield state
+ # Expand to next states if distributable
+ if state.status == SearchState.distributable:
+ for attribs, next_state in self.distribute(state=state,
verbosity=expand_verbosity):
+ val = next_state.get_value()
+ # Add next state where it belongs in the queue
+ fringe.put((val, next_state))
+ n += 1
+ if test_verbosity or expand_verbosity:
+ print()
+ print('>>>> HALTED AT SEARCH STATE', n, '<<<<')

- def fixed_point(self, awaken, verbosity=0):
- if verbosity:
- s = "# constraints to awaken: {}, # variables to determine: {}|
{}"
- print(s.format(len(awaken), len(self.dstore.ess_undet),
len(self.dstore.undetermined)))
- if self.dstore.is_determined():
- # All essential variables are determined
- self.status = Solver.succeeded
- return True
- elif len(awaken) == 0:
-# # No more constraints are awake
-# if self.dstore.is_determined():
-# # All variables are determined in the dstore or
peripheral: success
-# self.status = Solver.succeeded
-# else:
- # No more constraints apply: continue search
- # More variables to determine; we have to distribute
- self.status = Solver.distributable
- return True
- # Keep propagating
- return False
-
- def run(self, verbosity=0, tracevar=[]):
- """Run the constraints until CS fails or a fixed point is
reached."""
- if verbosity:
- s = "Running {} with {}|{} undetermined variables, {}
constraints"
- print(s.format(self, len(self.dstore.ess_undet),
len(self.dstore.undetermined), len(self.constraints)))
- awaken = set(self.constraints)
- it = 0
- while not self.exit(awaken, verbosity=verbosity):
- if verbosity:
- print("Running iteration {}".format(it))
- awaken = self.run_constraints(awaken, verbosity=verbosity,
tracevar=tracevar)
- it += 1
-
- def run_constraints(self, constraints, verbosity=0, tracevar=[]):
- awaken = set()
- all_changed = set()
- for constraint in constraints:
-# print(" Running {}".format(constraint))
- state, changed_vars = constraint.run(dstore=self.dstore,
verbosity=verbosity, tracevar=tracevar)
- all_changed.update(changed_vars)
- if state == Constraint.entailed:
-# print(" Entailed")
- # Constraint is entailed; add it to the list of those.
- self.entailed.append(constraint)
- # Delete it from awaken if it's already there
- if constraint in awaken:
- awaken.remove(constraint)
-
- if state == Constraint.failed:
- if verbosity:
- print("FAILED {}".format(constraint))
- return Constraint.failed
-# # constraint fails; remove it from the entailed or awaken
lists if it's there
-# if constraint in self.entailed:
-# self.entailed.remove(constraint)
-# if constraint in awaken:
-# awaken.remove(constraint)
-## # penalize the CSpace
-## self.penalty += constraint.weight
-# # and remember that it failed
-# self.failed.append(constraint)
-
- # Check whether any of the changed vars cannot possibly be
determined; if so,
- # the constraint fails
-# if state != Constraint.failed:
- for var in changed_vars:
- try:
- var.determined(dstore=self.dstore, verbosity=verbosity)
- except VarError:
- if verbosity:
- print("{} CAN'T BE DETERMINED, SO {} MUST
FAIL".format(var, constraint))
- return Constraint.failed
-# state = Constraint.failed
-# break
-
-# if self.penalty > self.max_penalty:
-# # CSpace fails without running other constraints
-# if verbosity:
-# print('PENALTY {} EXCEEDS MAXIMUM
{}!'.format(self.penalty, self.max_penalty))
-# self.status = CSpace.failed
-# return CSpace.failed
-
- # If the constraint succeeds, add the constraints of its
variables to awaken
-# if state not in [Constraint.failed]:
-# print(" Awakening, # changed vars
{}".format(len(changed_vars)))
- for var in changed_vars:
- # Add constraints for changed var to awaken unless those
constraints are already entailed
- # or failed
- update_cons = {c for c in var.constraints if c not in
self.entailed and c not in self.failed}
-# print(" Awakening {} constraints for
{}".format(len(update_cons), var))
- if var == tracevar and verbosity:
- print('Adding {} constraints for changed variable
{}'.format(len(update_cons), tracevar))
- awaken.update(update_cons)
-# print('update cons {}'.format(update_cons))
- if verbosity > 1:
- print('# changed vars {}'.format(len(all_changed)))
- return awaken
-
- def select_variable(self, vars, verbosity=0):
+ def select_variable(self, variables, dstore=None, verbosity=0):
"""One possibility for selecting variables to branch on:
prefer larger upper domains."""
- vars = self.dstore
- return sorted(vars, key=lambda v:
len(v.get_upper(dstore=self.dstore)))[0]
+ return sorted(variables, key=lambda v:
len(v.get_upper(dstore=dstore)))[0]

- def split_var_values(self, variable, verbosity=0):
+ def split_var_values(self, variable, dstore=None, verbosity=0):
"""For a selected variable, select a value by calling the value
selection function,
and return two sets of values: the selected value and the other
values. Assumes
variable is a set or int variable."""
@@ -191,7 +121,8 @@

@staticmethod
def ran_select(values):
- return random.choice(values)
+ """Randomly select a value from a set of values."""
+ return random.choice(list(values))

@staticmethod
def smallest_select(values):
@@ -200,14 +131,13 @@
value_list.sort()
return value_list[0]

- def select_constraints(self, variable, verbosity=0):
+ def select_constraints(self, variable, dstore=None, verbosity=0):
"""Return a pair of constraints for the selected variable."""
subset1, subset2 = self.split_var_values(variable,
verbosity=verbosity)
if isinstance(variable, IVar):
if verbosity:
print(' values: {}, {}'.format(subset1, subset2))
- return Member(variable, subset1, problem=self.problem), \
- Member(variable, subset2, problem=self.problem)
+ return Member(variable, subset1, record=False),
Member(variable, subset2, record=False)
else:
# For an set Var, add subset1 to the lower bound, subtract
subset1
# from the upper bound
@@ -215,25 +145,26 @@
v2 = variable.get_upper(dstore=dstore) - subset1
if verbosity:
print(' values: {}, {}'.format(subset1, subset2))
- return Superset(variable, v1, problem=self.problem), \
- Subset(variable, v2, problem=self.problem)
+ return Superset(variable, v1, record=False), Subset(variable,
v2, record=False)

- def distribute(self, state, project=False, verbosity=0):
+ def distribute(self, state=None, verbosity=0):
"""Creates and returns two new states by cloning the dstore with
the distributor."""
- if self.status != Solver.distributable:
- return []
+# if self.status != SearchState.distributable:
+# return []
+ state = state or self.init_state
undet = state.dstore.ess_undet
if verbosity:
ndet = len(undet)
print('DISTRIBUTION')
- print('Distributing, undetermined vars {}'.format(ndet))
+ print('Distributing from state {}, undetermined vars
{}'.format(state, ndet))
for v in list(undet)[:5]:
v.pprint(dstore=state.dstore)
if ndet > 5:
print('...')
# Select a variable and two disjoint basic constraints on it
- var = self.select_variable(undet, verbosity=verbosity)
- constraint1, constraint2 = self.select_constraints(var,
verbosity=verbosity)
+ var = self.select_variable(undet, dstore=state.dstore,
verbosity=verbosity)
+ constraint1, constraint2 = self.select_constraints(var,
dstore=state.dstore,
+
verbosity=verbosity)
if verbosity:
print('Distribution constraints: a -- {}, b --
{}'.format(constraint1, constraint2))
# The constraints of the selected variable (make copies)
@@ -242,11 +173,11 @@
new_dstore1 = state.dstore.clone(constraint1, name=self.name+'a')
new_dstore2 = state.dstore.clone(constraint2, name=self.name+'b')
# Create a new Solver for each dstore, preserving the accumulateod
penalty
- state1 = SearchState(constraints=state.constraints,
dstore=new_dstore1,
+ state1 = SearchState(constraints=constraints, dstore=new_dstore1,
name=state.name+'a', depth=state.depth+1,
parent=state,
verbosity=verbosity)
- state2 = SearchState(constraints=state.constraints,
dstore=new_dstore2,
+ state2 = SearchState(constraints=constraints, dstore=new_dstore2,
name=state.name+'b', depth=state.depth+1,
parent=state,
verbosity=verbosity)
@@ -255,32 +186,52 @@

class SearchState:

+ running = 0
+ succeeded = 1
+ failed = 2
+ distributable = 3
+ skipped = 4
+
def __init__(self, solver=None, name='', dstore=None,
constraints=None, parent=None,
depth=0, verbosity=0):
self.solver = solver
self.name = name
self.dstore = dstore
+ self.entailed = []
+ self.failed = []
self.constraints = constraints
self.parent = parent
self.children = []
self.depth = depth
+ self.status = SearchState.running
self.verbosity = verbosity

def __repr__(self):
return "<SS {}/{}>".format(self.name, self.depth)

+ def get_value(self):
+ """A measure of how promising this state is: how many undetermined
+ essential variables there are."""
+ return len(self.dstore.ess_undet)
+
+ def exit(self, result, verbosity=0):
+ if result == Constraint.failed:
+ return True
+ else:
+ return self.fixed_point(result, verbosity=verbosity)
+
def fixed_point(self, awaken, verbosity=0):
if verbosity:
s = "# constraints to awaken: {}, # variables to determine: {}|
{}"
print(s.format(len(awaken), len(self.dstore.ess_undet),
len(self.dstore.undetermined)))
if self.dstore.is_determined():
# All essential variables are determined
- self.status = Solver.succeeded
+ self.status = SearchState.succeeded
return True
elif len(awaken) == 0:
# More variables to determine; we have to distribute
- self.status = Solver.distributable
+ self.status = SearchState.distributable
return True
# Keep propagating
return False
=======================================
--- /hiiktuu/sentence.py Thu May 15 18:53:03 2014 UTC
+++ /hiiktuu/sentence.py Fri May 16 04:02:37 2014 UTC
@@ -53,6 +53,9 @@
# -- Tree variables for unselected groups get removed from essential
# variable list so the list of undetermined essential variables can
# end up empty when it should be.
+# 2014.05.15
+# -- Fixed how group trees are worked out: using the snode->gnodes
variables
+# rather than merger-related variables and tree variables

import itertools, copy
# ui.py loads language, etc.
@@ -112,23 +115,22 @@
else:
return '|| {} sentence {} ||'.format(self.language, self.id)

- def do(self, verbosity=0):
- """If target language, translate. If not, parse."""
-# if verbosity:
- if self.target:
- print("Attempting to translate sentence {}".format(self))
- else:
- print("Attempting to parse sentence {}".format(self))
- if self.initialize(verbosity=verbosity):
- if self.solve(verbosity=verbosity):
- if self.target:
- for solution in self.solutions:
- solution.translate(verbosity=verbosity)
- print("Translations found:")
- for translation in solution.translations:
- translation.display()
- else:
- print("No solution found")
+# def do(self, verbosity=0):
+# """If target language, translate. If not, parse."""
+# if self.target:
+# print("Attempting to translate sentence {}".format(self))
+# else:
+# print("Attempting to parse sentence {}".format(self))
+# if self.initialize(verbosity=verbosity):
+# if self.solve(verbosity=verbosity):
+# if self.target:
+# for solution in self.solutions:
+# solution.translate(verbosity=verbosity)
+# print("Translations found:")
+# for translation in solution.translations:
+# translation.display()
+# else:
+# print("No solution found")

def parse(self, verbosity=0):
print("Attempting to parse {}".format(self))
@@ -150,19 +152,21 @@
return True

def solve(self, verbosity=0):
- """Run constraints and create a single solution."""
- if verbosity:
- print("Attempting to find solutions for {}".format(self))
- self.run(verbosity=verbosity)
- if self.solver.status == Solver.succeeded:
- self.create_solution(verbosity=verbosity)
- if verbosity:
- print("Found solution {}".format(self.solutions[0]))
- return True
- else:
- if verbosity:
- print("No solution found")
- return False
+ """Generate solutions and translations."""
+ generator = self.solver.generator(test_verbosity=verbosity,
+ expand_verbosity=verbosity)
+ try:
+ proceed = True
+ while proceed:
+ succeeding_state = next(generator)
+ solution =
self.create_solution(dstore=succeeding_state.dstore, verbosity=verbosity)
+ if verbosity:
+ print('FOUND ANALYSIS', solution)
+ solution.translate(verbosity=verbosity)
+ if not input('SEARCH FOR ANOTHER ANALYSIS? [yes/NO] '):
+ proceed = False
+ except StopIteration:
+ print('No more solutions')

def tokenize(self, verbosity=0):
"""Segment the sentence string into tokens, analyze them
morphologically,
@@ -467,24 +471,18 @@

featvars=[sn.variables['features'] for sn in self.nodes],

selvars=[g.variables.get('agr', EMPTY) for g in self.groups]))

- def run(self, verbosity=0):
- """Run constraint satisfaction on constraints, for now without
search if
- no solution is found."""
- self.solver.run(verbosity=verbosity)
- if verbosity:
- print("Solver status after run: {}".format(self.solver.status))
-# if self.solver.status == Solver.succeeded:
-# # All essential variables are determined; one solution found
-# return True
-# else:
-# # Try to see if the really essential variables are in fact
determined
-# groups = self.variables['groups'].get_value(dstore=dstore)
-# ginsts = [self.groups[g] for g in groups]
-# trees = [list(g.variables['tree'].get_value(dstore=dstore))
for g in ginsts]
- return self.solver.status
+# def run(self, verbosity=0):
+# """Run constraint satisfaction on constraints, for now without
search if
+# no solution is found."""
+# self.solver.run(verbosity=verbosity)
+# if verbosity:
+# print("Solver status after run:
{}".format(self.solver.status))
+# return self.solver.status

@staticmethod
def make_tree(group_dict, group_i, tree):
+ """Make a tree (a set of snode indices) for the group with index
group_i
+ by searching for merged groups and their trees in group_dict."""
if not group_dict[group_i][1]:
return
else:
@@ -534,8 +532,9 @@
trees = [x[1][2] for x in trees]
# trees = [list(g.variables['tree'].get_value(dstore=dstore)) for g
in ginsts]
# Get the indices of the GNodes for each SNode
- self.solutions.append(Solution(self, ginsts, s2gnodes,
- len(self.solutions), trees=trees))
+ solution = Solution(self, ginsts, s2gnodes, len(self.solutions),
trees=trees)
+ self.solutions.append(solution)
+ return solution

class SNode:
"""Sentence token and its associated analyses and variables."""
@@ -750,7 +749,7 @@
ngroups = len(self.sentence.groups)
nsnodes = len(self.sentence.nodes)
cand_snodes = self.sentence.covered_indices
- print("Creating variables for {}, # abs nodes {}".format(self,
self.nanodes))
+# print("Creating variables for {}, # abs nodes {}".format(self,
self.nanodes))
# GNode indices for this GInst (determined)
self.variables['gnodes'] =
DetVar('g{}->gnodes'.format(self.index), {gn.sent_index for gn in
self.nodes})
# Abstract GNode indices for GInst (determined)
@@ -978,7 +977,7 @@
ginst.set_translations(verbosity=verbosity)
self.make_translations(verbosity=verbosity)

- def make_translations(self, verbosity=0):
+ def make_translations(self, verbosity=0, display=True):
"""Combine GInsts for each translation in translation products, and
separate gnodes into a dict for each translation."""
if verbosity:
@@ -987,8 +986,13 @@
for index, translation in enumerate(translations):
t = Translation(self, translation, index,
trees=copy.deepcopy(self.trees), verbosity=verbosity)
t.initialize(verbosity=verbosity)
- t.realize(verbosity=verbosity)
+ t.realize(verbosity=verbosity, display=display)
+# if display:
+# t.display_all()
self.translations.append(t)
+ if not input('SEARCH FOR ANOTHER TRANSLATION FOR THIS
ANALYSIS? [yes/NO] '):
+ return
+ print("No more translations for analysis")

def merge_nodes(self, verbosity=0):
"""Merge the source features of cat and inst GNodes associated
with each SNode."""
@@ -1011,8 +1015,9 @@
self.snodes.append((gnodes, features))

class Translation:
- """Representation of a single translation for an input sentence.
- Multiple translations are possible with a single Solution."""
+ """Representation of a single translation for an input sentence (with
+ multiple possible orders and morphological realizations of individual
+ words). Multiple translations are possible with a single Solution."""

def __init__(self, solution, attribs, index, trees=None, verbosity=0):
self.solution = solution
@@ -1048,19 +1053,23 @@
self.solver = Solver(self.constraints, self.dstore,
description='target realization',
verbosity=verbosity)
- # Final output
- self.output = None
+ # Final output; different ones have alternate word orders
+ self.outputs = []

def __repr__(self):
return "{}[{}] ->".format(self.solution, self.index)

- def display(self):
- print("{} {}".format(self, self.out_string()))
+ def display(self, index):
+ print("{} {}".format(self, self.out_string(index)))

- def out_string(self):
+ def display_all(self):
+ for index in range(len(self.outputs)):
+ self.display(index)
+
+ def out_string(self, index):
'''Convert output to a string for pretty printing.'''
l = []
- for word_list in self.output:
+ for word_list in self.outputs[index]:
if len(word_list) == 1:
l.append(word_list[0])
else:
@@ -1322,21 +1331,25 @@
# for c in self.constraints:
# print(c)

- def realize(self, verbosity=0):
+ def realize(self, verbosity=0, display=True):
"""Run constraint satisfaction on the order and disjunction
constraints,
and convert variable values to sentence positions."""
- if verbosity:
- print("Realizing {}".format(self))
- # Run constraint satisfaction on the constraints until all
variables are
- # determined or nothing happes to the constraints
- self.solver.run(verbosity=verbosity)
- if verbosity:
- print("Solver status after run: {}".format(self.solver.status))
- if self.solver.status == Solver.succeeded:
- # No more awake constraints and no undetermined variables; get
positions from
- # variable values
- order_vars = self.variables['order']
- positions = [list(v.get_value(dstore=self.dstore))[0] for v in
order_vars]
- node_pos = list(zip([n[0] for n in self.nodes], positions))
- node_pos.sort(key=lambda x: x[1])
- self.output = [n[0] for n in node_pos]
+ generator = self.solver.generator(test_verbosity=verbosity,
+ expand_verbosity=verbosity)
+ try:
+ proceed = True
+ while proceed:
+ succeeding_state = next(generator)
+ order_vars = self.variables['order']
+ positions =
[list(v.get_value(dstore=succeeding_state.dstore))[0] for v in order_vars]
+ node_pos = list(zip([n[0] for n in self.nodes], positions))
+ node_pos.sort(key=lambda x: x[1])
+ self.outputs.append([n[0] for n in node_pos])
+ if display:
+ self.display(len(self.outputs)-1)
+ if verbosity:
+ print('FOUND REALIZATION {}'.format(self.output[-1]))
+ if not input('SEARCH FOR ANOTHER REALIZATION FOR
TRANSLATION? [yes/NO] '):
+ proceed = False
+ except StopIteration:
+ print('No more realizations for translation')
=======================================
--- /l3xdg/solver.py Wed May 14 07:28:54 2014 UTC
+++ /l3xdg/solver.py Fri May 16 04:02:37 2014 UTC
@@ -154,8 +154,8 @@
raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))

def goal_test(self, state, verbosity=0, tracevar=None):
- state.run(verbosity=verbosity, tracevar=tracevar,
- project=self.project)
+ state.run(verbosity=verbosity, tracevar=tracevar)
+# project=self.project)
return state.status == CSpace.succeeded

def successor(self, state, verbosity=0):
=======================================
--- /paperdrafts/lglp/acl.bst Sat May 10 06:08:38 2014 UTC
+++ /paperdrafts/lglp/acl.bst Fri May 16 04:02:37 2014 UTC
@@ -1010,7 +1010,7 @@
{ numnames #2 =
{ s #1 "{vv }{ll }and " format.name$ s #2 "{vv }{ll}" format.name$
*
}
- { s #1 "{vv }{ll }\bgroup et al.\egroup " format.name$ }
+ { s #1 "{vv }{ll }\bgroup et al.\egroup" format.name$ }
if$
}
if$
=======================================
--- /paperdrafts/lglp/lglp14.pdf Thu May 15 21:03:43 2014 UTC
+++ /paperdrafts/lglp/lglp14.pdf Fri May 16 04:02:37 2014 UTC
Binary file, no diff available.
=======================================
--- /paperdrafts/lglp/lglp14.tex Thu May 15 21:03:43 2014 UTC
+++ /paperdrafts/lglp/lglp14.tex Fri May 16 04:02:37 2014 UTC
@@ -68,25 +68,16 @@
\begin{abstract}
This paper introduces Hiiktuu,
an ongoing project to develop a framework and a set
-of tools for the creation of simple bilingual dependency grammars for
machine translation
-and computer-assisted translation into and out of under-resourced
languages.
+of tools for the creation of rudimentary bilingual lexicon-grammars for
machine translation
+into and out of under-resourced languages.
The basic units in Hiiktuu, called \textbf{groups}, are headed multi-item
sequences.
-%% AJR: Could this be reworded? Or maybe something like "which correspond
to
-% \textit{catenae} from the dependency grammar literature" ...
-% MG: Think I'll leave it out of the abstract.
-% technically corresponding to catenae in dependency trees.
-In their simplest form, group positions consist of wordforms.
-More abstract groups, generalizing across multiple sequences of specific
word forms,
-contain lexemes, syntactic categories, and grammatical features.
-Translation occurs in separate analysis, transfer, and realization phases.
-Analysis consists in the selection of a set of groups that maximally cover
-the words in the input sentence.
-Transfer consists in activating a set of target-language groups associated
with
-the source-language groups and selecting a set of features for
target-language words
-that are constrained to agree with source-language features within the
group.
-Realization consists in the generation of target-language wordforms and the
-sequencing of the forms according to order constraints in target-language
groups.
-Both analysis and realization are implemented through constraint
satisfaction.
+In addition to wordforms, groups may contain lexemes, syntactic-semantic
categories,
+and grammatical features.
+Each group is associated with one or more translations, each of which is a
group in a target
+language.
+During translation, constraint satisfaction is used to select a set of
source-language groups
+for the input sentence and to sequence the words in the associated
target-language
+groups.

\end{abstract}

@@ -123,8 +114,8 @@
\hspace{-0.65cm} % space normally used by the marker
This work is licenced under a Creative Commons
Attribution 4.0 International License.
- Page numbers and proceedings footer are added by
- the organizers.
+% Page numbers and proceedings footer are added by
+% the organizers.
License details:
\url{http://creativecommons.org/licenses/by/4.0/}
}
@@ -137,7 +128,7 @@
millions of speakers, such as Telugu, Burmese, Oromo, and Hausa.
For machine translation (MT) and computer-assisted translation (CAT),
the lack is even more serious because what is
-required is bitext, sentence-aligned translations between the language
+required for machine learning is bitext, sentence-aligned translations
between the language
in question and another language.

For these reasons, work on many such languages will continue to
@@ -147,10 +138,8 @@
notoriously time-consuming, there is a need for tools to permit
researchers and language technology users to ``get off the ground''
with these languages, that is, to create rudimentary grammars and lexica
that
-will permit some basic applications, and, in the case of endangered
languages,
-facilitate the documentation process.
+will support some basic applications and facilitate the documentation
process.

-We are particularly interested in MT and CAT and the grammars and lexica
that they require.
We focus on MT and CAT because for most of the languages in question, the
lack of
linguistic resources correlates with a lack of written material in the
language, and
we would like to develop tools to aid human translators, including
non-professional ones,
@@ -173,22 +162,21 @@
The idea of treating phrases rather than individual words as the basic
units of a language
goes back at least to the proposal of a Phrasal Lexicon by Becker
\shortcite{becker}.
In recent years, the idea has gained currency within the related
frameworks of Construction Grammar \cite{steels}
-and Frame Semantics \cite{fillmoreFS} as well as in phrase-based
statistical machine translation, which
+and Frame Semantics \cite{fillmoreFS} as well as in phrase-based
statistical machine translation (PBSMT), which
in one form or another now dominates the MT field.
Arguments in favor of phrasal units are often framed in terms of the
ubiquity of idiomaticity, that is, departure
to one degree or another from strict compositionality.
-
Seen another way, phrasal units address the ubiquity of lexical ambiguity.
If a verb's interpretation depends on its object or subject, then it may
make more sense to the combination
of the verb and particular objects or subjects as units in their own right.

-These related arguments based on idiomaticity and ambiguity are semantic;
they point out the advantages of
-phrasal units in system that are concerned with meaning.
+Arguments based on idiomaticity and ambiguity are semantic; they point out
the advantages of
+phrasal units in systems that deal with meaning.
But the arguments extend naturally to translation.
-If the meaning of a phrase in the source language fails to be the strict
combination of the meanings
-of the individual words in the phrase, then it is unlikely that the
translation of the phrase will be the
-combination of the translations of the words in the phrase.
-If a noun or verb has multiple translations, then adding lexical context
to the noun or verb could permit an MT
+If the meaning of a source-language phrase fails to be the strict
combination of the meanings
+of the words in the phrase, then it is unlikely that the translation of
the phrase will be the
+combination of the translations of the words.
+Adding lexical context to an ambiguous noun or verb could permit an MT
system to select the appropriate translation.

\subsection{A simple phrasal lexicon}
@@ -198,14 +186,14 @@
Each group represents a catena \cite{osborneetal12}.
Catenae go beyond constituents (phrases), including all combinations of
elements that are continuous
in the vertical dimension within a dependency tree.
-For example, in the sentence \textit{I gave her a piece of my mind},
\textit{I, gave} and \textit{gave, her, piece}
+For example, in the sentence \textit{I gave her a piece of my mind},
\{\textit{I, gave}\} and \{\textit{gave, her, piece}\}
are among the catenae but not the constituents of the sentence.

A catena has a head, and each Hiiktuu group must also have a head, the
main function of which is to
index the group within the lexicon.
A group's entry may also specify translations to groups in one or more
other languages.
For each translation, the group's entry gives an \textbf{alignment}
between the groups, representing correspondences between
-group elements, as in the phrase tables of phrase-based SMT.
+group elements, as in the phrase tables of PBSMT.
Entry~\ref{entry:end} shows a simple group entry of this sort.
The English group \textit{the end of the world} with head \textit{end} has
as its Spanish translation
the group \textit{el fin del mundo} (which must have an entry in the
Spanish lexicon).
@@ -218,9 +206,8 @@
\begin{verbatim}
end:
- words: [the, end, of, the, world]
- trans:
- spa:
- - [el_fin_del_mundo, {align: [1,2,3,0,4]}]
+ spa:
+ - [el_fin_del_mundo, {align: [1,2,3,0,4]}]
\end{verbatim}
\normalsize
%\end{spacing}
@@ -317,10 +304,9 @@
\begin{verbatim}
pass_v:
- words: [pass_v, the, buck]
- trans:
- spa:
- - [escurrir_el_bulto,
- {align: [1,2,3], agr: [{tns: tmp, prs: prs, num: num}, 0, 0]}]
+ spa:
+ - [escurrir_el_bulto,
+ {align: [1,2,3], agr: [{tns: tmp, prs: prs, num: num}, 0, 0]}]
\end{verbatim}
\normalsize
%\end{spacing}
@@ -348,24 +334,21 @@
groups:
give_v:
- words: [give_v, $sbd, a, piece, of, $sbds, mind]
- trans:
- spa:
- - [cantar_$algn_las_cuarenta,
- {align: [1,2,3,4,0,0,0],
- agr: [{tns: tmp, prs: prs, num: num}, 0,0,0,0,0,0]}]
+ agr: [[2, 6, {prs: prs, num: num}]]
+ spa:
+ - [cantar_a_$algn_las_cuarenta,
+ {align: [1,3,4,5,0,0,0],
+ agr: [{tns: tmp, prs: prs, num: num}, 0,0,0,0,0,0]}]
my:
- words: [my]
- trans:
- spa:
- - [mi]
- - [mis]
+ spa:
+ - [mi]
+ - [mis]
mayor:
- words: [the, mayor]
forms:
- my:
- - cats: [$sbds]
- mayor:
- - cats: [$sbd]
+ my: [{cats: [$sbds]}]
+ mayor: [{cats: [$sbd]}]
\end{verbatim}
\normalsize
%\end{spacing}
@@ -443,7 +426,9 @@
the transfer phase.
In the current version of the system, this is accomplished through a
dictionary that maps
lexemes and feature sets to surface forms.
-In a future version, it will be possible to call a dedicated morphological
generator at this stage.
+\footnote{In future versions
+of the system, it will be possible to call a morphological generator at
+this stage.}
Finally, target-language words are sequenced in a way that satisfies
word-order
conditions in target-language groups.
The sequencing process is implemented with constraint satisfaction.
@@ -500,7 +485,7 @@

Hiiktuu is written in Python;
the code and implemented lexical/grammatical examples
-are available at [URL omitted from submission to preserve anonymity]
+are available at [\textit{URL omitted from submission to preserve
anonymity}]
under the GPL license.
To date, we have only tested the framework on a limited number of
Amharic-to-Oromo
translations.
=======================================
--- /paperdrafts/lglp/mind.png Sat May 10 06:08:38 2014 UTC
+++ /paperdrafts/lglp/mind.png Fri May 16 04:02:37 2014 UTC
Binary file, no diff available.
Reply all
Reply to author
Forward
0 new messages