[hltdi-l3] push by onlysk...@gmail.com - l3lite changed to hiiktuu. Agreement variables and constraints to hand... on 2014-05-06 07:09 GMT

0 views
Skip to first unread message

hltd...@googlecode.com

unread,
May 6, 2014, 3:10:19 AM5/6/14
to hltdi-...@googlegroups.com
Revision: 87750d5c5122
Branch: default
Author: Michael Gasser <gas...@cs.indiana.edu>
Date: Tue May 6 07:09:41 2014 UTC
Log: l3lite changed to hiiktuu. Agreement variables and constraints to
handle SL agreement between words in a group; TL agreement still to come.
http://code.google.com/p/hltdi-l3/source/detail?r=87750d5c5122

Added:
/hiiktuu/__init__.py
/hiiktuu/constraint.py
/hiiktuu/cs.py
/hiiktuu/entry.py
/hiiktuu/features.py
/hiiktuu/language.py
/hiiktuu/languages/amh.lg
/hiiktuu/languages/eng.lg
/hiiktuu/languages/orm.lg
/hiiktuu/languages/spa.lg
/hiiktuu/sentence.py
/hiiktuu/ui.py
/hiiktuu/variable.py
Deleted:
/l3lite/__init__.py
/l3lite/constraint.py
/l3lite/cs.py
/l3lite/entry.py
/l3lite/features.py
/l3lite/language.py
/l3lite/languages/amh.lg
/l3lite/languages/eng.lg
/l3lite/languages/orm.lg
/l3lite/languages/spa.lg
/l3lite/sentence.py
/l3lite/ui.py
/l3lite/variable.py
Modified:
/hiiktuu.py

=======================================
--- /dev/null
+++ /hiiktuu/__init__.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,5 @@
+"""Hiiktuu: do-it-yourself L3. Create simple bilingual lexicons and
grammars for language pairs."""
+
+__all__ =
['language', 'entry', 'ui', 'constraint', 'variable', 'sentence', 'features', 'cs']
+
+from .sentence import *
=======================================
--- /dev/null
+++ /hiiktuu/constraint.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,2947 @@
+#
+# Hiiktuu constraints.
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.03.27
+# -- Created. Initially just copied from l3xdg/constraint.py.
+# 2014.03.29
+# -- Fixed cant_precede() so it works with IVars (determined and not).
+# 2014.04.03
+# -- Created ComplexSetConvexity
+# 2014.04.05
+# -- Created ComplexUnionSelection
+# 2014.04.15
+# -- Constraint types used so far:
+# UnionSelection, PrecedenceSelection, ComplexUnionSelection,
+# ComplexSetConvexity, Union, Disjoint, Inclusion
+# 2014.04.26
+# -- Fixed several bugs in SetPrecedence (needed for TL sequencing).
+# 2014.04.30
+# -- Eliminated lots of unused constraints.
+# Fixed complex constraints so that sub-constraints are not recorded
+# in their variables.
+# 2014.05.04-5
+# -- AgrSelection constraint.
+# 2014.05.05
+# -- ComplexAgrSelection constraint.
+# Generalization of three complex constraints to ComplexConstraint
class.
+
+from .variable import *
+# This is imported in another branch too...
+from .features import *
+import itertools
+
+class Constraint:
+
+ # Constants for outcome of running
+ failed = 0
+ entailed = 1
+ sleeping = 2
+
+ # Constant threshold for lenience
+ lenience = .5
+
+ def __init__(self, variables, problem=None, record=True, weight=1):
+ self.variables = variables
+ self.problem = problem
+ self.weight = weight
+ if record:
+ for var in variables:
+ if isinstance(var, DetVar):
+ continue
+# if problem:
+# if var not in problem.vrs:
+# problem.vrs[var] = []
+# problem.vrs[var].append(self)
+ var.constraints.append(self)
+ self.name = ''
+
+ def __repr__(self):
+ return self.name
+
+ def is_lenient(self):
+ return self.weight < Constraint.lenience
+
+ def set_weight(self, weight):
+ self.weight = weight
+
+ def get_var(self):
+ """The single variable for this constraint."""
+ return self.variables[0]
+
+ # Each Constraint type must implement fails(), is_entailed(), and
infer().
+
+ def fails(self, dstore=None):
+ raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
+
+ def is_entailed(self, dstore=None):
+ raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
+
+ def infer(self, dstore=None, verbosity=0, tracevar=None):
+ """Should return state and variables that change."""
+ raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
+
+ def determine(self, dstore=None, verbosity=0, tracevar=None):
+ """Try to determine each variable, returning the set if any
determined."""
+ det = set()
+ for variable in self.variables:
+ if not variable.is_determined(dstore=dstore) and \
+ variable.determined(dstore=dstore, constraint=self,
verbosity=verbosity) is not False:
+ if verbosity and variable in tracevar:
+ print(' {} determining {} at {}'.format(self,
variable, variable.get_value(dstore=dstore)))
+ det.add(variable)
+ return det
+
+ def run(self, dstore=None, verbosity=0, tracevar=[]):
+ """Run this constraint during constraint satisfaction."""
+ if verbosity > 1:
+ print(' Running {}'.format(self))
+ determined = self.determine(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
+ # Try to determine the variables; if any are determined, go to
sleep and return
+ # the set of newly determined variables.
+ if determined:
+ if verbosity > 1:
+ print(' Determined variables', determined)
+ return Constraint.sleeping, determined
+ # Otherwise see if the constraint fails. If it does fail and
return the empty set.
+ if self.fails(dstore=dstore):
+ if verbosity > 1:
+ print(' Failed!')
+ elif verbosity:
+ print('{} failed; weight: {}'.format(self, self.weight))
+ return Constraint.failed, set()
+ # Otherwise see if the constraint is entailed. If it is, succeed
and return the empty set.
+ if self.is_entailed(dstore=dstore):
+ if verbosity > 1:
+ print(' Entailed')
+ return Constraint.entailed, set()
+ # Otherwise try inferring variable values. Either succeed or sleep
and return any changed
+ # variables.
+ return self.infer(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
+
+ @staticmethod
+ def string_set(s):
+ """Convenient print name for a set."""
+ if len(s) > 10:
+ return '{{{0}...{1}}}'.format(min(s), max(s))
+ else:
+ return '{}'.format(set.__repr__(s))
+
+ def print_vars(self):
+ '''Print out components of constraint variables.'''
+ for v in self.variables:
+ print('{} :: {}'.format(v, v.dstores))
+
+## Primitive basic constraints
+
+# Integer domains
+
+class Member(Constraint):
+
+ def __init__(self, var, domain, problem=None, record=True):
+ """
+ var: an IVar
+ domain: a set of ints
+ """
+ Constraint.__init__(self, (var,), problem=problem, record=record)
+ self.domain = domain
+ self.name = '{0}<{1}'.format(self.get_var(),
Constraint.string_set(self.domain))
+
+ def fails(self, dstore=None):
+ """Is the constraint domain not a superset of the variable's
domain?"""
+ if not
self.domain.issubset(self.get_var().get_domain(dstore=dstore)):
+ return True
+ return False
+
+ def is_entailed(self, dstore=None):
+ """Is the variable's domain a subset of the constraint's domain?"""
+ if self.get_var().get_domain(dstore=dstore).issubset(self.domain):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=None):
+ """The variable's values are restricted to the intersection of
+ their current values and the constraint's domain."""
+ var = self.get_var()
+ if var.strengthen(self.domain, dstore=dstore,
constraint=(verbosity>1 or var in tracevar) and self):
+ return Constraint.entailed, {var}
+ return Constraint.entailed, set()
+
+# Set domains
+
+class Superset(Constraint):
+ """Set variable is constrained to be a superset of subset."""
+
+ def __init__(self, var, subset, problem=None, record=True):
+ """
+ var: a SVar
+ subset: a set of ints
+ """
+ Constraint.__init__(self, (var,), problem=problem, record=record)
+ self.subset = subset
+ self.name = '{0} >= {1}'.format(self.get_var(),
Constraint.string_set(self.subset))
+
+ def fails(self, dstore=None):
+ """Is the constraint subset not a subset of the var's upper
bound?"""
+ if not
self.subset.issubset(self.get_var().get_upper(dstore=dstore)):
+ return True
+ return False
+
+ def is_entailed(self, dstore=None):
+ """Is the variable's lower bound a superset of the constraint's
subset?"""
+ if self.get_var().get_lower(dstore=dstore).issuperset(self.subset):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=None):
+ """The variable's values are restricted to be a superset of the
union
+ of the current lower bound and subset."""
+ var = self.get_var()
+ if var.strengthen_lower(self.subset, dstore=dstore,
+ constraint=(verbosity>1 or var in
tracevar) and self):
+ return Constraint.entailed, {var}
+ return Constraint.entailed, set()
+
+class Subset(Constraint):
+ """Set variable is constrained to be a subset of superset."""
+
+ def __init__(self, var, superset, problem=None, record=True):
+ """
+ var: a SVar
+ superset: a set of ints
+ """
+ Constraint.__init__(self, (var,), problem=problem, record=record)
+ self.superset = superset
+ self.name = '{0} c= {1}'.format(self.get_var(),
Constraint.string_set(self.superset))
+
+ def fails(self, dstore=None):
+ """Is the var's lower bound not a subset of the constraint
superset?"""
+ if not
self.get_var().get_lower(dstore=dstore).issubset(self.superset):
+ return True
+ return False
+
+ def is_entailed(self, dstore=None):
+ """Is the variable's upper bound a subset of the constraint's
superset?"""
+ if self.get_var().get_upper(dstore=dstore).issubset(self.superset):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=None):
+ """The variable's values are restricted to be a subset of the
intersection
+ of the current upper bound and superset."""
+ var = self.get_var()
+ if var.strengthen_upper(self.superset, dstore=dstore,
constraint=(verbosity>1 or var in tracevar) and self):
+ return Constraint.entailed, {var}
+ return Constraint.entailed, set()
+
+### Set cardinality
+##
+##class CardinalityGEQ(Constraint):
+## """Set variable's cardinality is constrained to be >= lower bound."""
+##
+## def __init__(self, var, lower, problem=None):
+## Constraint.__init__(self, (var,), problem=problem)
+## self.lower = lower
+## self.name = '|{0}|>={1}'.format(self.get_var(), self.lower)
+##
+## def fails(self, dstore=None):
+## """Is the var's upper cardinality bound < lower?"""
+## if self.get_var().get_upper_card(dstore=dstore) < self.lower:
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Is the variable's lower cardinality bound already >= lower?"""
+## if self.get_var().get_lower_card(dstore=dstore) >= self.lower:
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## """The variable's cardinality is restricted be >= lower: lower
bound
+## is raised if necessary."""
+## var = self.get_var()
+## if var.strengthen_lower_card(self.lower, dstore=dstore,
+## constraint=(verbosity>1 or var in
tracevar) and self):
+## return Constraint.entailed, {var}
+## return Constraint.entailed, set()
+##
+##class CardinalityLEQ(Constraint):
+## """Set variable's cardinality is constrained to be <= upper bound."""
+##
+## def __init__(self, var, upper, problem=None):
+## Constraint.__init__(self, (var,), problem=problem)
+## self.upper = upper
+## self.name = '|{0}| c= {1}'.format(self.get_var(), self.upper)
+##
+## def fails(self, dstore=None):
+## """Is the var's lower cardinality bound > upper?"""
+## if self.get_var().get_lower_card(dstore=dstore) > self.upper:
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Is the variable's upper cardinality bound already <= upper?"""
+## if self.get_var().get_upper_card(dstore=dstore) <= self.upper:
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## """The variable's cardinality is restricted to be <= upper:
+## upper bound is lowered if necessary."""
+## var = self.get_var()
+## if var.strengthen_upper_card(self.upper, dstore=dstore,
+## constraint=(verbosity>1 or var in
tracevar) and self):
+## return Constraint.entailed, {var}
+## return Constraint.entailed, set()
+##
+##### Constraints that propagate
+##
+#### Primitive propagators
+##
+### Integer domain variables only
+##
+##class LessThan(Constraint):
+## """IVar1 is less than or equal to IVar2."""
+##
+## def __init__(self, variables, problem=None, weight=1):
+## Constraint.__init__(self, variables, problem=problem,
+## weight=weight)
+## self.name = '{0} <= {1}'.format(self.get_iv1(), self.get_iv2())
+##
+## def get_iv1(self):
+## return self.variables[0]
+##
+## def get_iv2(self):
+## return self.variables[1]
+##
+## def fails(self, dstore=None):
+## """
+## Fail if min of domain1 > max of domain2.
+## """
+## iv1 = self.get_iv1()
+## iv2 = self.get_iv2()
+## min1 = min(iv1.get_domain(dstore=dstore))
+## max2 = max(iv2.get_domain(dstore=dstore))
+## if min1 > max2:
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Entailed if max of domain1 <= min of domain2."""
+## iv1 = self.get_iv1()
+## iv2 = self.get_iv2()
+## max1 = max(iv1.get_domain(dstore=dstore))
+## min2 = min(iv2.get_domain(dstore=dstore))
+## if max1 <= min2:
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## changed = set()
+## iv1 = self.get_iv1()
+## iv2 = self.get_iv2()
+## d1 = iv1.get_domain(dstore=dstore)
+## d2 = iv2.get_domain(dstore=dstore)
+## # iv2 must be between the min of iv1's domain and the maximum
value
+## iv2_values = set(range(min(d1), max(d2) + 1))
+## if iv2.strengthen(iv2_values, dstore=dstore,
+## constraint=(verbosity>1 or iv2 in tracevar)
and self):
+## changed.add(iv2)
+## # iv1 must be between the min of its domain and the max of iv2's
domain
+## # (iv2's domain may have changed)
+## iv1_values = set(range(min(d1),
max(iv2.get_domain(dstore=dstore)) + 1))
+## # Maximum value of sv2's upper bound constrains sv1's upper card
+## if iv1.strengthen(iv1_values, dstore=dstore,
+## constraint=(verbosity>1 or iv1 in tracevar)
and self):
+## changed.add(iv1)
+##
+## if verbosity > 1 and changed:
+## print(' Variables {} changed'.format(changed))
+## return Constraint.sleeping, changed
+##
+##class CardinalityEq(Constraint):
+## """Set variable's cardinality is constrained to be equal to value of
IVar."""
+##
+## def __init__(self, variables, problem=None, weight=1):
+## Constraint.__init__(self, variables, problem=problem,
+## weight=weight)
+## self.sv = variables[0]
+## self.iv = variables[1]
+## self.name = '|{0}| = {1}'.format(self.sv, self.iv)
+##
+## def fails(self, dstore=None):
+## """Is the sv's lower cardinality bound > max of iv's domain?"""
+## if self.iv.determined(dstore=dstore) is not False and
self.sv.determined(dstore=dstore) is not False:
+### print('Both vars determined: {}, {}'.format(self.iv,
self.sv))
+## if self.iv.get_value(dstore=dstore) !=
self.sv.get_upper_card(dstore=dstore):
+## return True
+## if self.sv.get_lower_card(dstore=dstore) >
max(self.iv.get_domain(dstore=dstore)):
+## return True
+## if min(self.iv.get_domain(dstore=dstore)) >
self.sv.get_upper_card(dstore=dstore):
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Is the variable's upper cardinality bound already = iv?"""
+## if self.iv.determined(dstore=dstore) is not False and
self.sv.determined(dstore=dstore) is not False:
+## if self.sv.get_upper_card(dstore=dstore) ==
self.iv.get_value(dstore=dstore):
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## """sv's upper cardinality is restricted to be <= min of iv's
domain.
+## iv's domain is restricted to values >= lower cardinality of
sv."""
+## state = Constraint.sleeping
+## changed = set()
+## sv = self.sv
+## iv = self.iv
+## sv_low_card = sv.get_lower_card(dstore=dstore)
+## sv_up_card = sv.get_upper_card(dstore=dstore)
+## if iv.strengthen(set(range(sv_low_card, sv.max)), dstore=dstore,
+## constraint=(verbosity>1 or iv in tracevar) and
self):
+## changed.add(iv)
+## return state, changed
+## if iv.strengthen(set(range(0, sv_up_card + 1)), dstore=dstore,
+## constraint=(verbosity>1 or iv in tracevar) and
self):
+## changed.add(iv)
+## return state, changed
+## iv_dom = iv.get_domain(dstore=dstore)
+## if sv.strengthen_lower_card(min(iv_dom), dstore=dstore,
+## constraint=(verbosity>1 or sv in
tracevar) and self):
+## changed.add(sv)
+## return state, changed
+## if sv.strengthen_upper_card(max(iv_dom), dstore=dstore,
+## constraint=(verbosity>1 or sv in
tracevar) and self):
+## changed.add(sv)
+## return state, changed
+
+# Set domain variables only
+
+class SetConvexity(Constraint):
+ """There must not be any 'holes' in the (single) set variable, which
represents
+ the positions of the descendants of a node as well as that of the node
itself."""
+
+ def __init__(self, var, problem=None, weight=1, record=True):
+ """Only one variable, so a special constructor."""
+ Constraint.__init__(self, [var], problem=problem, weight=weight,
record=record)
+ self.var = self.variables[0]
+ self.name = '{0} <>'.format(self.var)
+
+ def fails(self, dstore=None):
+ """Four ways to fail."""
+ # If the variable is determined and has holes...
+ if self.var.determined(dstore=dstore, constraint=self) is not
False:
+ val = self.var.get_value(dstore=dstore)
+ # There can't be any holes
+ if val:
+ val_range = set(range(min(val), max(val)+1))
+ if val_range - val:
+ return True
+ lower_card = self.var.get_lower_card(dstore=dstore)
+ lower = self.var.get_lower(dstore=dstore)
+ upper = self.var.get_upper(dstore=dstore)
+ if lower:
+ # Necessary range includes all values between the minimum and
the maximum (inclusive)
+ # of the lower bound
+ neces_range = set(range(min(lower), max(lower)+1))
+ if neces_range - upper:
+ # If there's some value in necessary range not in upper
bound...
+ return True
+ # Possible values that are not in necessary range
+ possible = upper - neces_range
+ # If there's a gap separating max necessary and min possible
and too many possible
+ # values would need to be discarded...
+ if possible and neces_range:
+ min_poss = min(possible)
+ max_neces = max(neces_range)
+ if min_poss - max_neces > 1:
+ if len(upper) - len(possible) < lower_card:
+ return True
+ # If there is continuous sequence of integers as long as the lower
cardinality...
+ if lower_card <= 1:
+ return False
+ upper_ordered = list(upper)
+ upper_ordered.sort()
+ last = upper_ordered[0]
+ count = 1
+ for pos in upper_ordered[1:]:
+ if count >= lower_card:
+ return False
+ if pos - last > 1:
+ count = 1
+ last = pos
+ else:
+ count += 1
+ last = pos
+ if count >= lower_card:
+ return False
+ return True
+
+ def is_entailed(self, dstore=None):
+ """If the variable is determined, or if the lower bound is convex,
+ and the upper only adds a single vowel below or above the lower
bound."""
+ if self.var.determined(dstore=dstore, constraint=self) is not
False:
+ return True
+ lower = self.var.get_lower(dstore=dstore)
+ upper = self.var.get_upper(dstore=dstore)
+ if not lower:
+ return False
+ min_lower = min(lower)
+ max_lower = max(lower)
+ if not set(range(min_lower, max_lower+1)) - lower:
+ if min_lower - min(upper) <= 1 and max(upper) - max_lower <= 1:
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=[]):
+ changed = set()
+ # If the variable's lower bound is non-empty, every value between
+ # the min and max of the lower bound must be in the variable, and
+ # there can't be any gaps in the upper bound either.
+ v = self.var
+ lower = v.get_lower(dstore=dstore)
+ if len(lower) > 0:
+ upper = v.get_upper(dstore=dstore)
+ min_low = min(lower)
+ max_low = max(lower)
+ # Make the lower bound everything between the min and max
+ if v.strengthen_lower(set(range(min_low, max_low+1)),
+ dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
+ changed.add(v)
+ return Constraint.sleeping, changed
+
+ # Look for gaps in the upper bound
+ # Starting at the max of the lower bound...
+ max_up = max(upper)
+ x = max_low+1
+ while x in upper and x < max_up:
+ x += 1
+ if x < max_up:
+ if v.discard_upper(set(range(x, max_up+1)),
+ dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
+ changed.add(v)
+ return Constraint.sleeping, changed
+ # Starting at the min of the lower bound...
+ min_up = min(upper)
+ x = min_low-1
+ while x in upper and x > min_up:
+ x -= 1
+ if x > min_up + 1:
+ if v.discard_upper(set(range(min_up, x)),
+ dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
+ changed.add(v)
+ return Constraint.sleeping, changed
+
+ return Constraint.sleeping, changed
+
+class SupersetIntersection(Constraint):
+ """Set var S1 is superset of intersection of set vars S2 and S3."""
+
+ def __init__(self, variables, problem=None, weight=1, record=True):
+ Constraint.__init__(self, variables, problem=problem,
+ weight=weight, record=record)
+ self.name = '{0} >= {1} ^ {2}'.format(self.variables[0],
self.variables[1], self.variables[2])
+
+ def fails(self, dstore=None):
+ """Is the intersection of the lower bounds of S2 and S3 not a
subset of
+ the upper bound of S1?"""
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ s2_inters_s3 = s2.get_lower(dstore=dstore) &
s3.get_lower(dstore=dstore)
+ if not s2_inters_s3 <= s1.get_upper(dstore=dstore):
+ return True
+ # Fail on cardinalities
+ if s1.get_upper_card(dstore=dstore) < len(s2_inters_s3):
+ return True
+ return False
+
+ def is_entailed(self, dstore=None):
+ """Is the intersection of the upper bounds of S2 and S3 already a
subset of
+ the lower bound of S1?"""
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ if s2.get_upper(dstore=dstore) & s3.get_upper(dstore=dstore) <=
s1.get_lower(dstore=dstore):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=[]):
+ changed = set()
+ # Intersection of lower bound of S2 and S3 is subset of lower
bound of S1.
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ if s1.strengthen_lower(s2.get_lower(dstore=dstore) &
s3.get_lower(dstore=dstore),
+ dstore=dstore, constraint=(verbosity>1 or
s1 in tracevar) and self):
+ changed.add(s1)
+ # Upper bound of S2 and S3 excludes elements which are in the
lower bounds of S3 and S2, respectively,
+ # but not in the upper bound of S1.
+ s1_up = s1.get_upper(dstore=dstore)
+ s2_not_s1 = s2.get_lower(dstore=dstore) - s1_up
+ s3_not_s1 = s3.get_lower(dstore=dstore) - s1_up
+ for x in s3.get_upper(dstore=dstore).copy():
+ if x in s2_not_s1:
+ if s3.discard_upper(x, dstore=dstore,
constraint=(verbosity>1 or s3 in tracevar) and self):
+ changed.add(s3)
+ for x in s2.get_upper(dstore=dstore).copy():
+ if x in s3_not_s1:
+ if s2.discard_upper(x, dstore=dstore,
constraint=(verbosity>1 or s2 in tracevar) and self):
+ changed.add(s2)
+ # Inference based on cardinalities (from Müller, p. 104)
+ s2Us3_card = len(s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore))
+ s1_up_card = s1.get_upper_card(dstore=dstore)
+ s2_low_card = s2.get_lower_card(dstore=dstore)
+ s3_low_card = s3.get_lower_card(dstore=dstore)
+ if s1.strengthen_lower_card(s2_low_card + s3_low_card - s2Us3_card,
+ dstore=dstore, constraint=(verbosity>1
or s1 in tracevar) and self):
+ changed.add(s1)
+ if s2.strengthen_upper_card(s2Us3_card + s1_up_card - s3_low_card,
+ dstore=dstore, constraint=(verbosity>1
or s2 in tracevar) and self):
+ changed.add(s2)
+ if s3.strengthen_upper_card(s2Us3_card + s1_up_card - s2_low_card,
+ dstore=dstore, constraint=(verbosity>1
or s3 in tracevar) and self):
+ changed.add(s3)
+ if verbosity > 1 and changed:
+ print(' Variables {} changed'.format(changed))
+ return Constraint.sleeping, changed
+
+class SubsetUnion(Constraint):
+ """Set var S1 is subset of union of set vars S2 and S3."""
+
+ def __init__(self, variables, problem=None, propagate=True,
+ weight=1, record=True):
+ Constraint.__init__(self, variables, problem=problem,
weight=weight, record=record)
+ self.name = '{0} c= {1} U {2}'.format(self.variables[0],
self.variables[1], self.variables[2])
+
+ def fails(self, dstore=None):
+ """Is the union of the upper bounds of S2 and S3 (the biggest it
can be)
+ not a superset of the lower bound of S1?"""
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ s2_union_s3 = s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore)
+ if not s2_union_s3 >= s1.get_lower(dstore=dstore):
+ return True
+ # Fail on cardinalities
+ if s1.get_lower_card(dstore=dstore) > len(s2_union_s3):
+ return True
+ return False
+
+ def is_entailed(self, dstore=None):
+ """Is the union of the lower bounds of S2 and S3 already a
superset of
+ the upper bound of S1?"""
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ if s2.get_lower(dstore=dstore) | s3.get_lower(dstore=dstore) >=
s1.get_upper(dstore=dstore):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=[]):
+ changed = set()
+ # S1 must be a subset of the union of the upper bounds of S2 and S3
+ s1 = self.variables[0]
+ s2 = self.variables[1]
+ s3 = self.variables[2]
+ if s1.strengthen_upper(s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore),
+ dstore=dstore, constraint=(verbosity>1 or
s1 in tracevar) and self):
+ changed.add(s1)
+ # S2's and S3's lower bounds must contain elements that are in the
lower bound of S1 but not
+ # S3 and S2, respectively (note: Müller has *lower* bounds of S3
and S2 (Eq. 11.17, p. 105),
+ # but this seems too strong).
+ s1_not_s2 = s1.get_lower(dstore=dstore) -
s2.get_upper(dstore=dstore)
+ s1_not_s3 = s1.get_lower(dstore=dstore) -
s3.get_upper(dstore=dstore)
+ if s3.strengthen_lower(s1_not_s2, dstore=dstore,
constraint=(verbosity>1 or s3 in tracevar) and self):
+ changed.add(s3)
+ if s2.strengthen_lower(s1_not_s3, dstore=dstore,
constraint=(verbosity>1 or s2 in tracevar) and self):
+ changed.add(s2)
+ # Inference based on cardinalities (from Müller, p. 105, but
there's apparently
+ # a typo; in Eq. 11.19, n1 should be the upper, not the lower
bound of S1)
+ if s1.strengthen_upper_card(s2.get_upper_card(dstore=dstore) +
s3.get_upper_card(dstore=dstore),
+ dstore=dstore, constraint=(verbosity>1
or s1 in tracevar) and self):
+ changed.add(s1)
+ if s2.strengthen_lower_card(s1.get_lower_card(dstore=dstore) -
s3.get_lower_card(dstore=dstore),
+ dstore=dstore, constraint=(verbosity>1
or s2 in tracevar) and self):
+ changed.add(s2)
+ if s3.strengthen_lower_card(s1.get_lower_card(dstore=dstore) -
s2.get_lower_card(dstore=dstore),
+ dstore=dstore, constraint=(verbosity>1
or s3 in tracevar) and self):
+ changed.add(s3)
+ if verbosity > 1 and changed:
+ print(' Variables {} changed'.format(changed))
+ return Constraint.sleeping, changed
+
+##class CardinalitySubset(Constraint):
+## """Cardinality of set variable 1 is within set variable 2. This
constraint is not included
+## in Müller, but it is needed for XDG valency.
+## It could be handled with IVMemberSV."""
+##
+## def __init__(self, variables, problem=None, weight=1):
+## Constraint.__init__(self, variables, problem=problem,
+## weight=weight)
+## self.name = '|{0}| c= {1}'.format(self.get_sv1(), self.get_sv2())
+##
+## def get_sv1(self):
+## return self.variables[0]
+##
+## def get_sv2(self):
+## return self.variables[1]
+##
+## def fails(self, dstore=None):
+## """Fail if minimum cardinality of SV1 is greater than maximum
possible value of SV2
+## or if maximum cardinality of SV1 is less than the minimum
possible value of SV2.
+## Fixed 2011.12.09: minimum possible value of SV2 is minimum of
*upper* bound, not
+## lower bound."""
+## sv1 = self.get_sv1()
+## sv2 = self.get_sv2()
+## upper2 = sv2.get_upper(dstore=dstore)
+## max2card = max(upper2) if upper2 else 0
+## if sv1.get_lower_card(dstore=dstore) > max2card:
+## return True
+### lower2 = sv2.get_lower(dstore=dstore)
+## min2card = min(upper2) if upper2 else 0
+## # min(lower2) if lower2 else 0
+## if sv1.get_upper_card(dstore=dstore) < min2card:
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Entailed if cardinality of SV1 determined, SV2 determined,
and the former is in the latter."""
+## sv1 = self.get_sv1()
+## sv2 = self.get_sv2()
+## if sv2.determined(dstore=dstore, constraint=self) is not False
and \
+## sv1.get_lower_card(dstore=dstore) ==
sv1.get_upper_card(dstore=dstore) in sv2.get_value(dstore=dstore):
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## changed = set()
+## state = Constraint.sleeping
+## sv1 = self.get_sv1()
+## sv2 = self.get_sv2()
+## sv1_low_card = sv1.get_lower_card(dstore=dstore)
+## sv1_up_card = sv1.get_upper_card(dstore=dstore)
+### if tracevar in self.variables:
+### print(self, 'INFERRING')
+## # If sv1's cardinality is determined, then it must be in sv2
+## if sv1_low_card == sv1_up_card:
+### print('SV1 {} has same upper and lower card {}'.format(sv1,
sv1_low_card))
+## if sv2.strengthen_lower({sv1_low_card}, dstore=dstore,
+## constraint=(verbosity>1 or sv2 in
tracevar) and self):
+### constraint=self):
+### if sv2.determine({sv1_low_card}, dstore=dstore,
+### constraint=(verbosity>1 or sv2 in
tracevar) and self):
+## changed.add(sv2)
+## return state, changed
+##
+### if tracevar in self.variables:
+### print(self, 'GOT TO 0')
+##
+## sv2_upper = sv2.get_upper(dstore=dstore)
+### sv2_lower = sv2.get_lower(dstore=dstore)
+##
+## # Minimum value of sv2 constrains sv1's lower card
+## # Fixed 2011.12.09: minimum value of sv2 is min of *upper*
bound, not lower
+## if sv2_upper:
+## # Could be empty set, in which case no strengthening is
possible
+## if sv1.strengthen_lower_card(min(sv2_upper), dstore=dstore,
+## constraint=(verbosity>1 or sv1
in tracevar) and self):
+## changed.add(sv1)
+## return state, changed
+##
+### if tracevar in self.variables:
+### print(self, 'GOT TO 1')
+## # Maximum value of sv2's upper bound constrains sv1's upper card
+## upcard = max(sv2_upper) if sv2_upper else 0
+## if sv1.strengthen_upper_card(upcard, dstore=dstore,
constraint=(verbosity>1 or sv1 in tracevar) and self):
+## changed.add(sv1)
+## return state, changed
+### if tracevar in self.variables:
+### print(self, 'GOT TO 2')
+##
+## if verbosity > 1 and changed:
+## print(' Variables {} changed'.format(changed))
+## return state, changed
+
+class SetPrecedence(Constraint):
+ """All elements of set variable 1 must precede all elements of set
variable 2."""
+
+ def __init__(self, variables, problem=None, weight=1, record=True):
+ Constraint.__init__(self, variables, problem=problem,
+ weight=weight, record=record)
+ self.name = '{0} << {1}'.format(self.variables[0],
self.variables[1])
+
+ # Also used in PrecedenceSelection
+
+ @staticmethod
+ def must_precede(svar1, svar2, dstore=None):
+ """Is the highest value that can occur in svar1 < the lowest value
that can occur in svar2?"""
+ v1_upper = svar1.get_upper(dstore=dstore)
+ v2_upper = svar2.get_upper(dstore=dstore)
+ return v1_upper and v2_upper and (max(v1_upper) < min(v2_upper))
+
+ @staticmethod
+ def cant_precede(var1, var2, dstore=None):
+ """Is the highest value that must occur in var1 >= the lowest
value that must occur in var2?"""
+ # Lower
+ if isinstance(var1, IVar):
+ v1 = min(var1.get_upper(dstore=dstore))
+ elif not var1.get_lower(dstore=dstore):
+ return False
+ else:
+ v1 = max(var1.get_lower(dstore=dstore))
+ # Upper
+ if isinstance(var2, IVar):
+ v2 = max(var2.get_upper(dstore=dstore))
+ elif not var2.get_lower(dstore=dstore):
+ return False
+ else:
+ v2 = min(var2.get_lower(dstore=dstore))
+ return v1 >= v2
+# return v1_lower and v2_lower and (max(v1_lower) >= min(v2_lower))
+
+ def fails(self, dstore=None):
+ """Fail if any of set1's lower bound > any of set2's lower
bound."""
+ return SetPrecedence.cant_precede(self.variables[0],
self.variables[1], dstore=dstore)
+
+ def is_entailed(self, dstore=None):
+ """Entailed if everything that can be in set1 precedes anything
that can be in set2."""
+ return SetPrecedence.must_precede(self.variables[0],
self.variables[1], dstore=dstore)
+
+ def infer(self, dstore=None, verbosity=0, tracevar=[]):
+ changed = set()
+ state = Constraint.sleeping
+ v1 = self.variables[0]
+ v1_low = v1.get_lower(dstore=dstore)
+ v2 = self.variables[1]
+ v2_low = v2.get_lower(dstore=dstore)
+ # If the lower bound on v1 is not empty, v2 must be a subset of
+ # {min(MAX, max(v1 + 1)), ..., MAX}
+ if v1_low:
+ v2_up_new = range(min([v1.max, max(v1_low) + 1]), v2.max+1)
+ if v2.strengthen_upper(v2_up_new, dstore=dstore,
+ constraint=(verbosity>1 or v2 in
tracevar) and self):
+ changed.add(v2)
+ return state, changed
+ # If the lower bound on v2 is not empty, v1 must be a subset of
+ # {0, ..., max(0, min(v2_low) - 1)}
+ if v2_low:
+ v1_up_new = range(0, max([0, min(v2_low) - 1]) + 1)
+ if v1.strengthen_upper(v1_up_new, dstore=dstore,
+ constraint=(verbosity>1 or v1 in
tracevar) and self):
+ changed.add(v1)
+ return state, changed
+ # Remove all elements from v1 >= highest possible element in v2
+ v1_up = v1.get_upper(dstore=dstore)
+ v2_up = v2.get_upper(dstore=dstore)
+ v2.max = max(v2_up)
+ v1_over = set(itertools.filterfalse(lambda x: x < v2.max, v1_up))
+ if v1_over:
+ if v1.discard_upper(v1_over, dstore=dstore,
+ constraint=(verbosity>1 or v1 in tracevar)
and self):
+ changed.add(v1)
+ return state, changed
+ return state, changed
+
+### Integer domain and set domain variables
+##
+##class IVMemberSV(Constraint):
+## """Integer variable value must be member of set variable value."""
+##
+## def __init__(self, variables, problem=None, propagate=True,
+## weight=1):
+## Constraint.__init__(self, variables, problem=problem,
propagate=propagate,
+## weight=weight)
+## self.name = '{0} c {1}'.format(self.get_iv(), self.get_sv())
+##
+## def get_iv(self):
+## """The domain variable."""
+## return self.variables[0]
+##
+## def get_sv(self):
+## """The set variable."""
+## return self.variables[1]
+##
+## def fails(self, dstore=None):
+## """Fail if none of the IV values are in SV upper bound."""
+## iv = self.get_iv()
+## sv = self.get_sv()
+## iv_dom = iv.get_domain(dstore=dstore)
+## sv_up = sv.get_upper(dstore=dstore)
+## if len(iv_dom & sv_up) == 0:
+## return True
+## return False
+##
+## def is_entailed(self, dstore=None):
+## """Entailed if IV values are subset of SV lower bound."""
+## iv = self.get_iv()
+## sv = self.get_sv()
+## iv_dom = iv.get_domain(dstore=dstore)
+## sv_low = sv.get_lower(dstore=dstore)
+### if self.pattern:
+### # For patterns, the propagator is entailed if every element
in the domain of iv
+### # unifies with the lower bound of sv
+### if all([unify_fssets({tup}, sv_low) for tup in iv_dom]):
+### return True
+## if iv_dom <= sv_low:
+## return True
+## return False
+##
+## def infer(self, dstore=None, verbosity=0, tracevar=None):
+## changed = set()
+## iv = self.get_iv()
+## sv = self.get_sv()
+## # Constrain the values of IV to be within upper bound of SV
+## if iv.strengthen(sv.get_upper(dstore=dstore), dstore=dstore,
+## constraint=(verbosity>1 or iv in tracevar) and
self):
+## changed.add(iv)
+## # If IV is determined, constrain SV to include it
+## if iv.determined(dstore=dstore, verbosity=verbosity) is not
False:
+## if sv.strengthen_lower(iv.get_domain(dstore=dstore),
dstore=dstore,
+## constraint=(verbosity>1 or sv in
tracevar) and self):
+## changed.add(sv)
+## if verbosity > 1 and changed:
+## print(' Variables {} changed'.format(changed))
+## return Constraint.sleeping, changed
+
+# Selection constraint propagators
+
+class Selection(Constraint):
+ """Superclass for most selection constraints.
+
+ mainvar: set domain var or int domain var (set var for primitive
propagators)
+ seqvars: set domain vars, int domain vars, constant sets, or constant
ints
+ (set var for primitive propagators)
+ selvar: set domain var or int domain var (set var for primitive
propagators)
+ """
+
+ def __init__(self, mainvar=None, selvar=None, seqvars=None,
+ problem=None, weight=1, record=True):
+ Constraint.__init__(self, [mainvar, selvar] + seqvars,
problem=problem,
+ weight=weight, record=record)
+ self.selvar = selvar
+ self.mainvar = mainvar
+ self.seqvars = seqvars
+
+ def is_entailed(self, dstore=None):
+ """Entailed only if all vars are determined.
+ """
+ if self.mainvar.determined(dstore=dstore, constraint=self) is not
False \
+ and self.selvar.determined(dstore=dstore, constraint=self) is
not False \
+ and all([v.determined(dstore=dstore, constraint=self) is not
False for v in self.seqvars]):
+ return True
+ return False
+
+ def infer(self, dstore=None, verbosity=0, tracevar=None):
+ """Some rules are common to all Selection subclasses."""
+
+ changed = set()
+ state = Constraint.sleeping
+ seqvars = self.seqvars
+ selvar = self.selvar
+ mainvar = self.mainvar
+
+ # If there is only one seqvar, then the main var is constrained to
be that value
+ # and the selection var has to be {0} or 0
+ if len(seqvars) == 1:
+ # since there's only one seq var to select, the selection
variable has to
+ # be {0} or 0
+ if selvar.determine(0, dstore=dstore,
+ constraint=(verbosity>1 or selvar in
tracevar) and self):
+ changed.add(selvar)
+ seqvar = seqvars[0]
+ if seqvar.determined(dstore=dstore, verbosity=verbosity,
constraint=self) is not False:
+ if mainvar.determine(seqvar.get_value(dstore=dstore),
dstore=dstore,
+ constraint=(verbosity>1 or mainvar in
tracevar) and self):
+ changed.add(mainvar)
+ state = Constraint.entailed
+ else:
+ if
mainvar.strengthen_lower(seqvar.get_lower(dstore=dstore), dstore=dstore,
+ constraint=(verbosity>1 or
mainvar in tracevar) and self):
+ changed.add(mainvar)
+ if
mainvar.strengthen_upper(seqvar.get_upper(dstore=dstore), dstore=dstore,
+ constraint=(verbosity>1 or
mainvar in tracevar) and self):
+ changed.add(mainvar)
+## if mainvar.determined(dstore=dstore,
verbosity=verbosity) is not False:
+## state = Constraint.entailed
+ if changed:
+ if verbosity > 1:
+ print(' Variables {} changed'.format(changed))
+ return state, changed
+ # If all of the seqvars are equal to one another and determined
and the selection variable must
+ # be non-empty, then the main var can be determined (as long as
the seqvar value is in its domain)
+ if all([v.determined(dstore=dstore, verbosity=verbosity,
constraint=self) is not False for v in seqvars]) and \
+ selvar.get_lower_card(dstore=dstore) > 0 and
seqvars[0].all_equal(seqvars[1:], dstore=dstore):
+ seq0_val = seqvars[0].get_value(dstore=dstore)
+ if mainvar.determine(seq0_val, dstore=dstore,
constraint=(verbosity>1 or mainvar in tracevar) and self):
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /hiiktuu/cs.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,164 @@
+#
+# Hiiktuu CS: what is needed to implement l3 style constraint
satisfaction
+# using the lexicon/grammars created.
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.04.26
+# -- Created
+
+from .constraint import *
+
+class Solver:
+ """A solver for a constraint satisfaction problem."""
+
+ id = 0
+
+ running = 0
+ succeeded = 1
+ failed = 2
+ distributable = 3
+ skipped = 4
+
+ def __init__(self, constraints, dstore,
+ description='', verbosity=0):
+ self.constraints = constraints
+ self.dstore = dstore
+ # Used in solver's printname
+ self.description = description
+ self.verbosity=verbosity
+ self.entailed = []
+ self.failed = []
+ self.status = Solver.running
+ self.id = Solver.id
+ Solver.id += 1
+
+ def __repr__(self):
+ return "Solver{} ({})".format(' ' + self.description if
self.description else '', self.id)
+
+ def exit(self, result):
+ if result == Constraint.failed:
+ return True
+ else:
+ return self.fixed_point(result)
+
+ def fixed_point(self, awaken, verbosity=0):
+ if verbosity > 1:
+ s = "# constraints to awaken: {}, # variables to determine: {}|
{}"
+ print(s.format(len(awaken), len(self.dstore.ess_undet),
len(self.dstore.undetermined)))
+ if self.dstore.is_determined():
+ # All essential variables are determined
+ self.status = Solver.succeeded
+ return True
+ elif len(awaken) == 0:
+# # No more constraints are awake
+# if self.dstore.is_determined():
+# # All variables are determined in the dstore or
peripheral: success
+# self.status = Solver.succeeded
+# else:
+ # No more constraints apply: continue search
+ # More variables to determine; we have to distribute
+ self.status = Solver.distributable
+ return True
+ # Keep propagating
+ return False
+
+ def run(self, verbosity=0, tracevar=[]):
+ """Run the constraints until CS fails or a fixed point is
reached."""
+ if verbosity:
+ s = "Running {} with {}|{} undetermined variables, {}
constraints"
+ print(s.format(self, len(self.dstore.ess_undet),
len(self.dstore.undetermined), len(self.constraints)))
+ awaken = set(self.constraints)
+ it = 0
+ while not self.exit(awaken):
+ if verbosity > 1:
+ print("Running iteration {}".format(it))
+ awaken = self.run_constraints(awaken, verbosity=verbosity,
tracevar=tracevar)
+ it += 1
+
+ def run_constraints(self, constraints, verbosity=0, tracevar=[]):
+ awaken = set()
+ all_changed = set()
+ for constraint in constraints:
+# print(" Running {}".format(constraint))
+ state, changed_vars = constraint.run(dstore=self.dstore,
verbosity=verbosity, tracevar=tracevar)
+ all_changed.update(changed_vars)
+ if state == Constraint.entailed:
+# print(" Entailed")
+ # Constraint is entailed; add it to the list of those.
+ self.entailed.append(constraint)
+ # Delete it from awaken if it's already there
+ if constraint in awaken:
+ awaken.remove(constraint)
+
+ if state == Constraint.failed:
+ if verbosity:
+ print("FAILED {}".format(constraint))
+ return Constraint.failed
+# # constraint fails; remove it from the entailed or awaken
lists if it's there
+# if constraint in self.entailed:
+# self.entailed.remove(constraint)
+# if constraint in awaken:
+# awaken.remove(constraint)
+## # penalize the CSpace
+## self.penalty += constraint.weight
+# # and remember that it failed
+# self.failed.append(constraint)
+
+ # Check whether any of the changed vars cannot possibly be
determined; if so,
+ # the constraint fails
+# if state != Constraint.failed:
+ for var in changed_vars:
+ try:
+ var.determined(dstore=self.dstore, verbosity=verbosity)
+ except VarError:
+ if verbosity:
+ print("{} CAN'T BE DETERMINED, SO {} MUST
FAIL".format(var, constraint))
+ return Constraint.failed
+# state = Constraint.failed
+# break
+
+# if self.penalty > self.max_penalty:
+# # CSpace fails without running other constraints
+# if verbosity:
+# print('PENALTY {} EXCEEDS MAXIMUM
{}!'.format(self.penalty, self.max_penalty))
+# self.status = CSpace.failed
+# return CSpace.failed
+
+ # If the constraint succeeds, add the constraints of its
variables to awaken
+# if state not in [Constraint.failed]:
+# print(" Awakening, # changed vars
{}".format(len(changed_vars)))
+ for var in changed_vars:
+ # Add constraints for changed var to awaken unless those
constraints are already entailed
+ # or failed
+ update_cons = {c for c in var.constraints if c not in
self.entailed and c not in self.failed}
+# print(" Awakening {} constraints for
{}".format(len(update_cons), var))
+ if var == tracevar and verbosity:
+ print('Adding {} constraints for changed variable
{}'.format(len(update_cons), tracevar))
+ awaken.update(update_cons)
+# print('update cons {}'.format(update_cons))
+ if verbosity > 1:
+ print('# changed vars {}'.format(len(all_changed)))
+ return awaken
+
=======================================
--- /dev/null
+++ /hiiktuu/entry.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,781 @@
+#
+# Hiiktuu entries: words, grammatical morphemes, lexemes, lexical classes
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.02.10
+# -- Created
+# Possible subclasses: Lex (word, lexeme, class), Gram
+# 2014.02.12
+# -- Inheritance (class to word/lexeme): completed except for government.
+# (But possible conflicts in order are not handled yet.)
+# 2014.02.15
+# -- Methods for making dicts from entries and entries from dict, used
+# in serialization.
+# 2014.02.16-18
+# -- Class for groups (multi-word expressions).
+# 2014.02.18
+# -- Cloning of Lex instances (for groups and L3 nodes).
+# 2014.03.18
+# -- Lots of changes and additions to groups.
+# 2014.03.24
+# -- words attribute in Group is a list of [word, feat_dict] pairs.
+# 2014.04.16
+# -- Created simpler Group (with no dependency types), renamed old Group
to MWE.
+# 2014.04.20
+# -- Matching of group and sentence nodes.
+# 2014.04.30
+# -- Eliminated everything but Groups.
+# 2014.05.01
+# -- Group/node matching fixed.
+
+import copy, itertools
+import yaml
+
+from .features import *
+
+LEXEME_PRE = '%'
+CLASS_PRE = '$'
+
+class Entry:
+ """Superclass for Group and possibly other lexical classes."""
+
+ ID = 1
+ dflt_dep = 'dflt'
+
+ def __init__(self, name, language, id=0, trans=None):
+ """Initialize name and basic features: language, trans, count,
id."""
+ self.name = name
+ self.language = language
+ self.trans = trans
+ self.count = 1
+ if id:
+ self.id = id
+ else:
+ self.id = Entry.ID
+ Entry.ID += 1
+
+ def __repr__(self):
+ """Print name."""
+ return '<{}:{}>'.format(self.name, self.id)
+
+ @staticmethod
+ def is_cat(name):
+ """Is this the name of a category?"""
+ return name[0] == CLASS_PRE
+
+ ## Serialization
+
+ def to_dict(self):
+ """Convert the entry to a dictionary to be serialized in a yaml
file."""
+ d = {}
+ d['name'] = self.name
+# d['language'] = self.language
+ d['count'] = self.count
+ if self.trans:
+ d['trans'] = self.trans
+ d['id'] = self.id
+ return d
+
+ @staticmethod
+ def from_dict(d, language):
+ """Convert a dict (loaded from a yaml file) to an Entry object."""
+ e = Entry(d.get('name'), language)
+ e.count = d.get('count', 1)
+ e.id = d.get('id', 1)
+ e.trans = d.get('trans')
+ return e
+
+ def update_count(self, count=1):
+ """Update count on the basis of data from somewhere."""
+ self.count += count
+
+ ### Translations (word, gram, lexeme, group entries)
+ ###
+ ### Translations are stored in a language-id-keyed dict.
+ ### Values are dicts with target entry names as ids.
+ ### Values are dicts with correspondence ('cor'), count ('cnt'), etc.
+ ### as keys.
+
+ def get_translations(self, language, create=True):
+ """Get the translation dict for language in word/lexeme/gram entry.
+ Create it if it doesn't exist and create is True."""
+ if self.trans is None:
+ self.trans = {}
+ if language not in self.trans and create:
+ self.trans[language] = {}
+ return self.trans.get(language)
+
+ def add_trans(self, language, trans, count=1):
+ """Add translation to the translation dictionary for language,
+ initializing its count."""
+ transdict = self.get_translations(language, create=True)
+ transdict[trans] = {'c': count}
+
+ def update_trans(self, language, trans, count=1):
+ """Update the count of translation."""
+ transdict = self.get_translations(language)
+ if trans not in transdict:
+ s = "Attempting to update non-existent translation {} for {}"
+ raise(EntryError(s.format(trans, self.name)))
+ transdict[trans]['c'] += count
+
+##class Lex(Entry):
+##
+## cloneID = 1
+##
+## def __init__(self, name, language, cls=None, id=0, group=False):
+## """In addition to Entry features, initialize
+## depsin, depsout, order, agr, gov, grams, and (for word and
lexeme) class."""
+## Entry.__init__(self, name, language, id=id)
+## self.depsin = None
+## self.depsout = None
+## self.order = None
+## self.agr = None
+## self.gov = None
+## self.grams = None
+## self.cls = cls
+## self.cloneID = 0
+## # Whether entry is part of a group
+## self.group = group
+##
+## def __repr__(self):
+## """Print name and a unique identifier."""
+## return '<{}:{}{}>'.format(self.name, self.id, ';' +
str(self.cloneID) if self.cloneID else '')
+##
+## ## Cloning
+## ## Needed for groups, which consist of copies of lexes and
+## ## for L3 node entries
+##
+## def clone(self, group=True):
+## copied = Lex(self.name, self.language, cls=self.cls, id=self.id,
group=group)
+## copied.depsin = self.depsin
+## copied.depsout = self.depsout
+## copied.order = self.order
+## copied.agr = self.agr
+## copied.gov = self.gov
+## copied.grams = self.grams
+## copied.cloneID = Lex.cloneID
+## Lex.cloneID += 1
+## return copied
+##
+## ## Serialization
+##
+## def to_dict(self):
+## """Convert the lex to a dictionary to be serialized in a yaml
file."""
+## d = Entry.to_dict(self)
+## if self.depsin:
+## d['depsin'] = copy.deepcopy(self.depsin)
+## if self.depsout:
+## d['depsout'] = copy.deepcopy(self.depsout)
+## if self.order:
+## d['order'] = copy.deepcopy(self.order)
+## if self.agr:
+## d['agr'] = copy.deepcopy(self.agr)
+## if self.gov:
+## d['gov'] = copy.deepcopy(self.gov)
+## if self.grams:
+## d['grams'] = self.grams.copy()
+## if self.cls:
+## d['cls'] = self.cls
+## return d
+##
+## @staticmethod
+## def from_dict(d, language):
+## """Convert a dict (loaded from a yaml file) to a Lex object."""
+## l = Lex(d.get('name'), language, cls=d.get('cls'))
+## if d.get('depsin'):
+## l.depsin = d.get('depsin')
+## if d.get('depsout'):
+## l.depsout = d.get('depsout')
+## if d.get('order'):
+## l.order = d.get('order')
+## if d.get('agr'):
+## l.agr = d.get('agr')
+## if d.get('gov'):
+## l.gov = d.get('gov')
+## if d.get('grams'):
+## l.grams = d.get('grams')
+## return l
+##
+## ## Dependencies (word, lexeme, class entries)
+##
+## def get_depin(self, label, create=False):
+## """Get the dict of features of incoming dependencies with label,
creating
+## the dict if it's not there and create is True."""
+## if self.depsin is None:
+## self.depsin = {}
+## if create and label not in self.depsin:
+## self.depsin[label] = {}
+## self.language.record_label(label)
+## return self.depsin.get(label)
+##
+## def add_depin(self, label, feats):
+## """Assign feats (a dictionary) to features for incoming
dependencies with label,
+## or update the current features."""
+## d = self.get_depin(label, create=True)
+## d.update(feats)
+##
+## def get_depout(self, label, create=False):
+## """Get the dict of features of outgoing dependencies with label,
creating
+## the dict if it's not there and create is True."""
+## if self.depsout is None:
+## self.depsout = {}
+## if create and label not in self.depsout:
+## self.depsout[label] = {}
+## self.language.record_label(label)
+## return self.depsout.get(label)
+##
+## def add_depout(self, label, feats):
+## """Assign feats (a dictionary) to features for outgoing
dependencies with label,
+## or update the current features."""
+## d = self.get_depout(label, create=True)
+## d.update(feats)
+##
+## ## Dependency features
+## ## A dict with keys
+## ## 'min', 'max', 'dflt', 'maxdist'
+##
+## def set_deps_feat(self, featdict, key, value):
+## featdict[key] = value
+##
+## def get_deps_feat(self, featdict, key):
+## return featdict.get(key)
+##
+## ## Order constraints
+## ## A constraint is a tuple of dependency labels and '^' representing
the head
+##
+## def get_order(self, create=False):
+## """Get the set of order constraint tuples, creating the set if
it's not there
+## and create is True."""
+## if self.order is None and create:
+## self.order = []
+## return self.order
+##
+## def add_order(self, constraint):
+## """Add an order constraint tuple to the set of order
constraints."""
+## order_constraints = self.get_order(create=True)
+## order_constraints.append(constraint)
+## self.language.record_order(constraint)
+##
+## ## Grammatical features associated with words, classes, and lexemes
+##
+## def get_gram(self, feature, create=False):
+## """Get the possible values and their counts for grammatical
feature.
+## If this is a word, the value is a string; if a class or lexeme,
a dict
+## of values and counts."""
+## if self.grams is None:
+## self.grams = {}
+### if feature not in self.grams and create:
+### self.grams[feature] = {}
+## return self.grams.get(feature)
+##
+## def set_gram(self, feat, values):
+## """Set possible values and their counts for grammatical feature.
+## values is a dict of values and their counts."""
+## if self.grams is None:
+## self.grams = {}
+## if feat in self.grams:
+## s = "Entry for {} already has a constraint for feature {}"
+## raise(EntryError(s.format(self.name, feat)))
+## self.grams[feat] = values
+##
+## def update_gram_value(self, feat, value, count=1):
+## """Add count to the current count for feature value."""
+## gram = self.get_gram(feat, create=True)
+## if value in gram:
+## gram[value] += count
+## else:
+## gram[value] = count
+##
+## ## Agreement and government
+##
+## ## An agreement constraint requires a dependency label, a head
feature, and
+## ## and a dependent feature.
+##
+## def add_agr(self, deplabel, head_feat, dep_feat):
+## """Add an agreement constraint to the list of constraints in the
entry."""
+## if self.agr is None:
+## self.agr = []
+## self.agr.append([deplabel, head_feat, dep_feat])
+##
+## ## A government constraint requires a dependency label, a dependent
feature,
+## ## and a dependent value.
+##
+## def add_gov(self, deplabel, dep_feat, dep_value):
+## if self.gov is None:
+## self.gov = []
+## self.gov.append([deplabel, dep_feat, dep_value])
+##
+## ## Inheritance: copying features from classes to lexemes and words
+## ## at initialization
+##
+## def inherit(self):
+## if not self.cls:
+## return
+## cls = self.language.get_class(self.cls)
+## if not cls:
+## s = "Class {} for {} does not exist"
+## raise(EntryError(s.format(self.cls, self)))
+## self.inherit_deps(cls)
+## self.inherit_order(cls)
+## self.inherit_grams(cls)
+## self.inherit_agr(cls)
+## self.inherit_gov(cls)
+## # Also inherit translation?
+##
+## def inherit_deps(self, cls):
+## """Inherit dependency constraints (in and out) from class."""
+## # In
+## cls_depsin = cls.depsin
+## if cls_depsin:
+## if self.depsin is None:
+## self.depsin = {}
+## for label, cls_constraints in cls_depsin.items():
+## if label in self.depsin:
+## constraints = self.depsin[label]
+## for k, v in cls_constraints.items():
+## if k in constraints:
+## continue
+## constraints[k] = v
+## else:
+## # Should this be a copy of cls_constraints?
+## self.depsin[label] = cls_constraints
+## # Out
+## cls_depsout = cls.depsout
+## if cls_depsout:
+## if self.depsout is None:
+## self.depsout = {}
+## for label, cls_constraints in cls_depsout.items():
+## if label in self.depsout:
+## constraints = self.depsout[label]
+## for k, v in cls_constraints.items():
+## if k in constraints:
+## continue
+## constraints[k] = v
+## else:
+## # Should this be a copy of cls_constraints?
+## self.depsout[label] = cls_constraints
+##
+## def inherit_order(self, cls):
+## """Inherit order constraints from class."""
+## cls_order = cls.order
+## if cls_order:
+## my_order = self.get_order(create=True)
+## # Just add all constraints (tuples) from the class to ones
+## # already there in the word or lexeme; what if there are
+## # conflicts?? (sort these out later)
+## for co in cls_order:
+## if co not in my_order:
+## my_order.append(co)
+##
+## def inherit_grams(self, cls):
+## """Inherit grammatical features from class."""
+## cls_grams = cls.grams
+## if cls_grams:
+## if self.grams is None:
+## self.grams = {}
+## for feature, value in cls_grams.items():
+## if feature in self.grams:
+## # word/lexeme gram has priority over class, so
+## # ignore this
+## continue
+## # copy any other feature/value constraint
+## # (should the value be a copy??)
+## self.grams[features] = value
+##
+## def inherit_agr(self, cls):
+## """Inherit agreement constraints from class."""
+## cls_agr = cls.agr
+## if cls_agr:
+## if self.agr is None:
+## self.agr = []
+## for constraint in cls_agr:
+## if constraint not in self.agr:
+## self.agr.append(constraint)
+##
+## def inherit_gov(self, cls):
+## """Inherit government constraints from class."""
+##
+##class MWE(Entry):
+## """Multi-word expressions. Each group consists of a head and a set
of nodes,
+## possibly connected to other nodes through explicit dependencies and
an explicit
+## order of the nodes.
+## Variable slots have dedicated names that allow them to be
+## referenced in translations.
+## MWEs must be created *after* other lexical items.
+## {index: [word_obj, {dep/position_feats}...}
+## """
+##
+## def __init__(self, name, language, head, head_feats=None,
head_order=None, head_lexeme=False):
+## """name of a MWE is something like acabar_de_V.
+## head is the word that is the syntactic head of the group."""
+## Entry.__init__(self, name, language)
+## # A list of [word feats] pairs; index in the list is the word's
(node's) ID
+## self.words = []
+## self.word_id = 0
+## if head_lexeme:
+## self.head_lexeme = True
+## head_type = language.get_lexeme(head)
+## else:
+## self.head_lexeme = False
+## head_type = language.get_words(head)
+## if not head_type:
+### print("No existing lexical entry in {} for head of group
{}".format(language, name))
+## # SHOULD THIS BE RECORDED IN THE WORD LEXICON?
+## self.head = language.add_word(head, group=True)
+## else:
+## # Use the first one if there's more than one
+## self.head = head_type[0].clone()
+## self.words.append([self.head, {}])
+### self.words[self.word_id] = [self.head, {}]
+## if head_order is not None:
+## self.words[word_id][1]['o'] = head_order
+### self.words[self.word_id][1]['o'] = head_order
+## self.word_id += 1
+##
+## def __repr__(self):
+## """Print name."""
+## return '<{}:{}>'.format(self.name, self.id)
+##
+## # Serialization
+##
+## def to_dict(self):
+## """Convert the group to a dictionary to be serialized in a yaml
file."""
+## d = Entry.to_dict(self)
+## d['head_lexeme'] = self.head_lexeme
+### d['words'] = {}
+## d['words'] = []
+## w = d['words']
+### for index, lex in self.words.items():
+## for lex in enumerate(self.words):
+## l = lex[0]
+## name = l.name
+## w.append([name])
+### w[index] = [name]
+## if len(lex) == 2:
+## w[-1].append(copy.deepcopy(lex[1]))
+### w[index].append(copy.deepcopy(lex[1]))
+## return d
+##
+## @staticmethod
+## def from_dict(d, language):
+## """Convert a dict (loaded from a yaml file) to a MWE object."""
+## lexeme = d['head_lexeme']
+## m = MWE(d.get('name'), language, d.get('words')[0][0],
head_lexeme=lexeme)
+### for id, word in d.get('words').items():
+## for id, word in enumerate(d.get('words')):
+## if id == 0:
+## # Just handle the dependencies for this case
+## deps = word[1]
+## m.words[0][1] = copy.deepcopy(deps)
+## else:
+## name = word[0]
+## lex = language.get_words(name)[0]
+## if len(word) == 2:
+## deps = word[1]
+## lex_info = [lex.clone(), copy.deepcopy(deps)]
+## else:
+## lex_info = [lex.clone()]
+## m.words.append(lex_info)
+## return m
+##
+## ## Getters
+##
+## def get_word(self, index):
+## """The lex and features for a word in the group with ID index."""
+## if index > len(self.words) - 1:
+## s = "No word in {} with internal ID {}"
+## raise(EntryError(s.format(self, index)))
+## return self.words[index]
+##
+## def get_word_feats(self, index):
+## word = self.get_word(index)
+## return word[1]
+##
+## def get_lex(self, id):
+## """Return the Lex with the given index."""
+## word = self.get_word(id)
+## return word[0]
+##
+## def get_daughters(self, word_id, dep=None):
+## """Return the indices of the daughters of word with id word_id
+## of type dep or all daughters if dep is None."""
+## feats = self.get_word_feats(word_id)
+## if 'd' not in feats:
+## return
+## daughters = feats['d']
+## if dep is not None:
+## return daughters.get(dep)
+## else:
+## # Maybe leave as an iterable object?
+## return
list(itertools.chain.from_iterable(daughters.values()))
+##
+## def get_mother(self, word_id):
+## """Return the type and index of the internal mother of word with
id word_id.
+## If this is the head, return None."""
+## feats = self.get_word_feats(word_id)
+## if 'm' not in feats:
+## return
+## return feats['m']
+##
+## def add_word(self, word, head_id=None, dependency=Entry.dflt_dep,
order=None):
+## """Add a word to the group, as dependent on dependency from
head."""
+## # For now, use first word entry
+## typ = self.language.get_words(word)
+## if not typ:
+### print("No existing lexical entry in {} for head of group
{}".format(self.language, word))
+## # SHOULD THIS BE RECORDED IN THE WORD LEXICON?
+## word = self.language.add_word(word, group=True)
+## else:
+## # Pick the first lexical entry for now
+## word = typ[0].clone()
+## self.words.append([word, {}])
+### self.words[self.word_id] = [word, {}]
+## if head_id is not None:
+## self.add_dep(head_id, self.word_id, dep=dependency)
+## if order is not None:
+## self.words[self.word_id][1]['o'] = order
+## id = self.word_id
+## self.word_id += 1
+## return id
+##
+## def add_dep(self, src, dest, dep=Entry.dflt_dep):
+## """Make a dependency of type dep from word with id src to word
with id dest."""
+## if src >= len(self.words):
+## s = "No word in {} with internal ID {}"
+## raise(EntryError(s.format(self, src)))
+## if dest >= len(self.words):
+## s = "No word in {} with internal ID {}"
+## raise(EntryError(s.format(self, dest)))
+## daughter_dict = self.get_word_feats(dest)
+## if 'm' in daughter_dict:
+## s = "Word {} in {} already has a mother"
+## raise(EntryError(s.format(dest, self)))
+## daughter_dict['m'] = (dep, src)
+## mother_dict = self.get_word_feats(src)
+## if 'd' not in mother_dict:
+## mother_dict['d'] = {}
+## mother_daughters = mother_dict['d']
+## if dep not in mother_daughters:
+## mother_daughters[dep] = []
+## mother_daughters[dep].append(dest)
+##
+## ## Translations
+## ## A translation of a group is a group in another language, with a
mapping or alignment
+## ## between the nodes (words) in the two groups.
+## ## The mapping takes the form of a list of target word indices or
None if the corresponding
+## ## word is unspecified or -1 if there is not corresponding word
(deletion). If there are
+## ## more words/nodes in the target than in the source group, the
length of the list of
+## ## is the number of target nodes.
+##
+## def add_trans(self, language, trans, count=1):
+## """Add translation to the translation dictionary for language,
+## initializing its count."""
+## Entry.add_trans(self, language, trans, count=count)
+## transdict = self.get_trans(language, trans)
+## transdict['m'] = [None for x in range(len(self.words))]
+##
+## def get_trans(self, language, trans, create=True):
+## alltrans = self.get_translations(language, create=create)
+## if not alltrans or trans not in alltrans:
+## s = "Attempting to update non-existent translation {} for {}"
+## raise(EntryError(s.format(trans, self.name)))
+## return alltrans[trans]
+##
+## def get_trans_map(self, language, trans):
+## """Get the mapping to nodes in translation."""
+## tdict = self.get_trans(language, trans)
+## return tdict.get('m')
+##
+## def get_trans_map1(self, language, trans, src_index):
+## """Get the mapped index of src_index in translation trans."""
+## map = self.get_trans_map(language, trans)
+## if not map:
+## s = "Attempting to access non-existing mapping for
translation {} of {}"
+## raise(EntryError(s.format(trans, self)))
+## return map[src_index]
+##
+## def add_trans_map(self, language, trans, src_id, trg_id):
+## """Add a correspondence between source and target nodes in a
translation mapping."""
+## tdict = self.get_trans(language, trans)
+### if 'm' not in tdict:
+### tdict['m'] = []
+### tdict['m'].append((src_id, trg_id))
+## tdict['m'][src_id] = trg_id
+##
+## def add_trans_del(self, language, trans, src_id):
+## """Record a node in the source group with nothing corresponding
to it in the target group."""
+## tdict = self.get_trans(language, trans)
+### if 'm' not in tdict:
+### tdict['m'] = []
+### tdict['m'].append((src_id, -1))
+## tdict['m'][src_id] = -1
+##
+## def add_trans_ins(self, language, trans, trg_id):
+## """Record a node in the target group with nothing corresponding
to it in the source group."""
+## tdict = self.get_trans(language, trans)
+### if 'm' not in tdict:
+### tdict['m'] = []
+## tdict['m'].append(trg_id)
+### tdict['m'].append((-1, trg_id))
+
+class Group(Entry):
+ """Primitive multi-word expressions. Default is a head with unlabeled
dependencies
+ to all other tokens and translations, including alignments, to one or
more
+ other languages."""
+
+ def __init__(self, tokens, head_index=-1, head='', language=None,
name='',
+ features=None, agr=None, trans=None):
+ """Either head_index or head (a string) must be specified."""
+ # tokens is a list of strings
+ # name may be specified explicitly or not
+ name = name or Group.make_name(tokens)
+ Entry.__init__(self, name, language, trans=trans)
+ self.tokens = tokens
+ if head:
+ self.head = head
+ self.head_index = tokens.index(head)
+ else:
+ self.head = tokens[head_index]
+ self.head_index = head_index
+ # Either None or a list of feat-val dicts for tokens that require
them
+ # Convert dicts to Features objects
+ if isinstance(features, list):
+ features = [Features(d) if d else None for d in features]
+ self.features = features
+ # Agr constraints: each a list of form
+ # (node_index1, node_index2 . feature_pairs)
+ self.agr = agr or None
+
+ def __repr__(self):
+ """Print name."""
+ return '{}:{}'.format(self.name, self.id)
+
+ @staticmethod
+ def make_name(tokens):
+ # Each token is either a string or a (string, feat_dict) pair
+# strings = []
+# for token in tokens:
+# if isinstance(token, str):
+# strings.append(token)
+# else:
+# form, feat_dict = token
+# fv = ['{}={}'.format(f, v) for f, v in feat_dict.items()]
+# fv = ','.join(fv)
+# strings.append("{}:{}".format(form, fv))
+ return '_'.join(tokens)
+
+ # Serialization
+
+ def to_dict(self):
+ """Convert the group to a dictionary to be serialized in a yaml
file."""
+ d = Entry.to_dict(self)
+ d['words'] = self.tokens
+ d['features'] = self.features
+ d['agr'] = self.agr
+ return d
+
+ @staticmethod
+ def from_dict(d, language, head):
+ """Convert a dict (loaded from a yaml file) to a Group object."""
+ tokens = d['words']
+ features = d.get('features')
+ agr = d.get('agr')
+ name = d.get('name', '')
+ trans = d.get('trans')
+ p = Group(tokens, head=head, language=language, features=features,
+ agr=agr, name=name, trans=trans)
+ return p
+
+ def match_nodes(self, snodes, head_sindex, verbosity=0):
+ """Attempt to match the group tokens (and features) with snodes
from a sentence,
+ returning the snode indices and root and unified features if
any."""
+# print("Does {} match {}".format(self, snodes))
+ match_snodes = []
+ for index, token in enumerate(self.tokens):
+ match_snodes1 = []
+ feats = self.features[index] if self.features else None
+ if verbosity:
+ print(" Attempting to match {}".format(token))
+ matched = False
+ for node in snodes:
+ if verbosity:
+ print(" Trying {}, token index {}, snode index {}
head index {}".format(node, index, node.index, head_sindex))
+ if index == self.head_index:
+ # This token is the head of the group
+ if node.index == head_sindex:
+ # This was already matched in lexicalization
+# if index == node.index == head_sindex:
+ # This is the token corresponding to the group head
+ node_match = node.match(token, feats)
+ if node_match == False:
+ # This has to match, so fail now
+ return False
+ else:
+ match_snodes1.append((node.index, node_match))
+ if verbosity:
+ print(" Head matched
already".format(node))
+ matched = True
+ # Don't look further for an snode to match
this token
+ break
+ else:
+ node_match = node.match(token, feats)
+ if verbosity:
+ print(' Node {} match {}:{}, {}::
{}'.format(node, token, index, feats, node_match))
+ if node_match != False:
+ match_snodes1.append((node.index, node_match))
+ if verbosity:
+ print(" Matched node {}".format(node))
+ matched = True
+ if not matched:
+ if verbosity:
+ print(" {} not matched; failed".format(token))
+ return False
+ else:
+ match_snodes.append(match_snodes1)
+# print("Group {}, s_indices {}".format(self, match_snodes))
+ return match_snodes
+
+ ### Translations
+
+ ## Alignments: position correspondences, agreement constraints
+ ## አድርጎ አያውቅም -> godhe hin beeku
+ ## a: {positions: (1, 2),
+ ## agreements: {gen: gen},
+ ## featmaps: {((pers, 2), (num, 2)): ((pers, 3), (num, 2))}
+ ## }
+
+ def add_alignment(self, trans):
+ pass
+
+class EntryError(Exception):
+ '''Class for errors encountered when attempting to update an entry.'''
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return repr(self.value)
+
=======================================
--- /dev/null
+++ /hiiktuu/features.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,213 @@
+#
+# Hiiktuu features (dicts).
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.04.19
+# -- Created.
+# 2014.04.23-24
+# -- Unification with one or both values sets.
+# Unification with a list/tuple of feature-value pairs.
+# Copying of agreement features: agree:().
+# 2014.05.05
+# -- More methods for agreement, needed in AgrSelection constraint.
+
+class Features(dict):
+
+ def __init__(self, dct):
+ dict.__init__(self, dct)
+
+ def __repr__(self):
+ l = []
+ for f, v in self.items():
+ l.append("{}={}".format(f, v))
+ return "@{{{0}}}".format(','.join(l))
+
+ def to_list(self):
+ """Convert features dict to a sorted list."""
+ l = list(self.items())
+ l.sort()
+ return l
+
+ @staticmethod
+ def unify_sets(x, y):
+ """If both are sets, their intersection. If one is a set,
+ the other if it's a member of the set."""
+ if isinstance(x, set):
+ if isinstance(y, set):
+ return x & y
+ elif y in x:
+ return y
+ elif isinstance(y, set):
+ if x in y:
+ return x
+ return False
+
+ @staticmethod
+ def simple_unify(x, y):
+ """Unify the values x and y, returning the result or 'fail'."""
+ # If they're the same, return one.
+ if x == y:
+ return x
+ # If one or the other is a set, return the intersection
+ # (a single value if one is not a set)
+ elif isinstance(x, set) or isinstance(y, set):
+ u = Features.unify_sets(x, y)
+ if u is not False:
+ return u
+ else:
+ return 'fail'
+# # If both are dicts, call unify_dict
+# elif isinstance(x, dict) and isinstance(y, dict):
+# x.unify(y)
+ # Otherwise fail
+ else:
+ return 'fail'
+
+ def unify(self, other):
+ """other is a Features object or a dict. Attempt to unify self
with other,
+ returning the result or 'fail'."""
+ result = Features({})
+ for k in set(self.keys()) | set(other.keys()):
+ # Check all of the keys of self and other
+ self_val, other_val = self.get(k, 'nil'), other.get(k, 'nil')
+ if self_val != 'nil':
+ if other_val != 'nil':
+ # If x and y both have a value for k, try to unify the
values
+ u = Features.simple_unify(self_val, other_val)
+ if u == 'fail':
+ return 'fail'
+ else:
+ result[k] = u
+ else:
+ # If self has a value for k but other doesn't, use
self's value
+ result[k] = self_val
+ elif other_val != 'nil':
+ # If other has a value for k but self doesn't, use other's
value
+ result[k] = other_val
+
+ return result
+
+ def agree(self, target, agrs):
+ """Make target agree with self on features specified in agrs dict
or list of pairs."""
+ agr_pairs = agrs.items() if isinstance(agrs, dict) else agrs
+ for src_feat, targ_feat in agr_pairs:
+ if src_feat in self:
+ src_value = self[src_feat]
+ if targ_feat in target and target[targ_feat] != src_value:
+ # Clash; fail!
+ return 'fail'
+ else:
+ target[targ_feat] = src_value
+
+ def agrees(self, target, agrs):
+ """Does target agree with self on features specified in agrs dict
or list of pairs?"""
+ agr_pairs = agrs.items() if isinstance(agrs, dict) else agrs
+ for src_feat, targ_feat in agr_pairs:
+# print(' src feat {}, targ feat {}, self {}, target
{}'.format(src_feat, targ_feat, self, target))
+ if src_feat in self:
+ src_value = self[src_feat]
+ if targ_feat in target and target[targ_feat] != src_value:
+ # Clash; fail!
+ return False
+ return True
+
+ @staticmethod
+ def all_agree(feats1, feats2, agrs):
+ """Return all pairs from feats1 and feats2 that agree on agrs
features."""
+ pairs = []
+ for feat1 in feats1:
+ for feat2 in feats2:
+ if feat1.agrees(feat2, agrs):
+ pairs.append((feat1, feat2))
+ return pairs
+
+ @staticmethod
+ def n_agree(feats1, feats2, agrs):
+ """Return could of feats1 objects that agree with some feats2
objects and feats2
+ objects that agree with some feats1 objects."""
+ f1agr = 0
+ f2agr = 0
+ for feat1 in feats1:
+ for feat2 in feats2:
+ if feat1.agrees(feat2, agrs):
+ f1agr += 1
+ break
+ for feat2 in feats2:
+ for feat1 in feats1:
+ if feat1.agrees(feat2, agrs):
+ f2agr += 1
+ break
+ return f1agr, f2agr
+
+ @staticmethod
+ def agree_with_none1(feats1, feats2, agrs):
+ """Return all Features objects in feats1 that fail to agree with
any objects in feats2
+ on agrs features."""
+ failures = []
+ for feat1 in feats1:
+ fails = True
+ for feat2 in feats2:
+ if feat1.agrees(feat2, agrs):
+ fails = False
+ break
+ if fails:
+ failures.append(feat1)
+ return failures
+
+ @staticmethod
+ def agree_with_none2(feats1, feats2, agrs):
+ """Return all Features objects in feats1 that fail to agree with
any objects in feats1
+ on agrs features."""
+ failures = []
+ for feat2 in feats2:
+ fails = True
+ for feat1 in feats1:
+ if feat1.agrees(feat2, agrs):
+ fails = False
+ break
+ if fails:
+ failures.append(feat2)
+ return failures
+
+ def match_list(self, feat_list):
+ """Does this Features object match list or tuple of feature/value
pairs?"""
+ for feat, val in feat_list:
+ if feat in self:
+ if Features.simple_unify(val, self[feat]) == 'fail':
+ return False
+ return True
+
+ @staticmethod
+ def unify_all(features_list):
+ """Unify all of the Features objects (or None) in the list, if
possible."""
+ result = Features({})
+ for features in features_list:
+ if not features:
+ continue
+ result = result.unify(features)
+ if result == 'fail':
+ return 'fail'
+ return result
+
=======================================
--- /dev/null
+++ /hiiktuu/language.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,449 @@
+#
+# Hiiktuu languages: dictionaries of lexical/grammatical entries
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.02.09
+# -- Created
+# 2014.02.10
+# -- Made entries a separate class.
+# 2014.02.15
+# -- Methods for serializing and deserializing languages (using YAML).
+# 2014.03.24
+# -- Words, lexemes, and classes are all in the same dictionary
(self.words).
+# Lexemes start with %, classes with $.
+# 2014.04.17
+# -- Analysis and generation dicts for particular wordforms.
+# 2014.04.30
+# -- Eliminated entry types in lexicon other than Groups and forms.
+
+from .entry import *
+
+import os, yaml
+
+LANGUAGE_DIR = os.path.join(os.path.dirname(__file__), 'languages')
+
+class Language:
+ """Dictionaries of words, lexemes, grammatical features, and
+ lexical classes."""
+
+ languages = []
+
+ def __init__(self,
+ name, abbrev,
+ words=None, lexemes=None, grams=None, classes=None,
+ mwes=None, groups=None, forms=None,
+ genforms=None):
+ """Initialize dictionaries and names."""
+ self.name = name
+ self.abbrev = abbrev
+ # Words, lexemes, classes
+ self.words = words or {}
+ # Combine with words in a single lexicon?
+# self.lexemes = lexemes or {}
+# self.grams = grams or {}
+# self.classes = classes or {}
+# self.mwes = mwes or {}
+ self.forms = forms or {}
+ self.groups = groups or {}
+ # Dict of groups with names as keys
+ self.groupnames = {}
+# # Record possibilities for dependency labels, feature values,
order constraints
+# self.possible = {}
+ # Record whether language has changed since last loaded
+ self.changed = False
+ # Dictionary of morphologically generated words:
+ # {lexeme: {(feat, val): {(feat, val): wordform,...}, ...}, ...}
+ self.genforms = genforms or {}
+ Language.languages.append(abbrev)
+
+ def __repr__(self):
+ """Print name."""
+ return '<<{}>>'.format(self.name)
+
+ def to_dict(self):
+ """Convert the language to a dictionary to be serialized as a yaml
file."""
+ d = {}
+ d['name'] = self.name
+ d['abbrev'] = self.abbrev
+# d['possible'] = self.possible
+ # Entries: each is a dict, whose values must be converted to dicts
+# if self.grams:
+# grams = {}
+# for k, v in self.grams.items():
+# grams[k] = v.to_dict()
+# d['grams'] = grams
+# if self.classes:
+# classes = {}
+# for k, v in self.classes.items():
+# classes[k] = v.to_dict()
+# d['classes'] = classes
+ # Lexemes and words should probably be separate dictionaries (and
files).
+# if self.lexemes:
+# lexemes = {}
+# for k, v in self.lexemes.items():
+# lexemes[k] = v.to_dict()
+# d['lexemes'] = lexemes
+# if self.words:
+# words = {}
+# for k, v in self.words.items():
+# # Words are lists
+# words[k] = [lex.to_dict() for lex in v]
+# d['words'] = words
+# if self.mwes:
+# mwes = {}
+# for k, v in self.mwes.items():
+# mwes[k] = [m.to_dict() for m in v]
+# d['mwes'] = mwes
+ if self.groups:
+ groups = {}
+ for head, v in self.groups.items():
+ groups[head] = [g.to_dict() for g in v]
+ d['groups'] = groups
+ if self.forms:
+ forms = {}
+ for k, v in self.forms.items():
+ # v is an fv dict or a list of fv dicts
+ forms[k] = v
+ return d
+
+ def write(self, directory, filename=''):
+ """Serialize the language."""
+ filename = filename or self.abbrev + '.lg'
+ path = os.path.join(directory, filename)
+ with open(path, 'w', encoding='utf8') as file:
+ yaml.dump(self.to_dict(), file)
+
+ @staticmethod
+ def from_dict(d, reverse=True):
+ """Convert a dict (loaded from a yaml file) to a Language
object."""
+ l = Language(d.get('name'), d.get('abbrev'))
+ l.possible = d.get('possible')
+# grams = d.get('grams')
+# if grams:
+# l.grams = {}
+# for k, v in grams.items():
+# l.grams[k] = Entry.from_dict(v, l)
+# classes = d.get('classes')
+# if classes:
+# l.classes = {}
+# for k, v in classes.items():
+# l.classes[k] = Lex.from_dict(v, l)
+# lexemes = d.get('lexemes')
+# if lexemes:
+# l.lexemes = {}
+# for k, v in lexemes.items():
+# l.lexemes[k] = Lex.from_dict(v, l)
+# words = d.get('words')
+# if words:
+# l.words = {}
+# for k, v in words.items():
+# l.words[k] = [Lex.from_dict(lex, l) for lex in v]
+# mwes = d.get('mwes')
+# if mwes:
+# l.mwes = {}
+# for k, v in mwes.items():
+# l.mwes[k] = [MWE.from_dict(m, l) for m in v]
+ groups = d.get('groups')
+ if groups:
+ l.groups = {}
+ for head, v in groups.items():
+ group_objs = [Group.from_dict(g, l, head) for g in v]
+ l.groups[head] = group_objs
+ # Add groups to groupnames dict
+ for go in group_objs:
+ l.groupnames[go.name] = go
+ forms = d.get('forms')
+ if forms:
+ l.forms = {}
+ for k, v in forms.items():
+ # v should be a dict or a list of dicts
+ # Convert features value to a Features object
+ if isinstance(v, dict):
+ if 'features' in v:
+ v['features'] = Features(v['features'])
+ else:
+ for d in v:
+ if 'features' in d:
+ d['features'] = Features(d['features'])
+ l.forms[k] = v
+ if reverse:
+ # Add item to genform dict
+ if isinstance(v, dict):
+ if 'seg' not in v:
+ l.add_genform(k, v['root'], v.get('features'))
+ else:
+ for d in v:
+ l.add_genform(k, d['root'], d.get('features'))
+ return l
+
+ @staticmethod
+ def read(path):
+ """Create a Language from the contents of a yaml file, a dict
+ that must be then converted to a Language."""
+ with open(path, encoding='utf8') as file:
+ dct = yaml.load(file)
+ return Language.from_dict(dct)
+
+ @staticmethod
+ def load(*abbrevs):
+ languages = []
+ for abbrev in abbrevs:
+ path = os.path.join(LANGUAGE_DIR, abbrev + '.lg')
+ try:
+ language = Language.read(path)
+ languages.append(language)
+ print("Loading language {}".format(language))
+ except IOError:
+ print("That language doesn't seem to exist.")
+ return
+ return languages
+
+ ### Basic setters. Create entries (dicts) for item. For debugging
purposes, include name
+ ### in entry.
+
+## def add_word(self, word, cls=None, mwe=False):
+## entry = Lex(word, self, cls=cls, mwe=mwe)
+## if word in self.words:
+## self.words[word].append(entry)
+## else:
+## self.words[word] = [entry]
+## self.changed = True
+## return entry
+##
+## def add_lexeme(self, lexeme, cls=None):
+## if lexeme in self.words:
+## s = "Lexeme {} already in dictionary"
+## raise(LanguageError(s.format(lexeme)))
+## entry = Lex(lexeme, self, cls=cls)
+## # Maybe not a list since there's always only one
+## self.words[lexeme] = [entry]
+## self.changed = True
+## return entry
+
+ def add_form(self, form, dct, reverse=True):
+ """Form dict has root, features, cats.
+ If reverse it True, also add the form to the genforms dicdt."""
+ if form not in self.forms:
+ self.forms[form] = dct
+ else:
+ entry = self.forms[form]
+ if isinstance(entry, dict):
+ # Make this the second entry
+ self.forms[form] = [entry, dct]
+ else:
+ # There are already two or more entries in a list
+ entry.append(dct)
+ if reverse:
+ lexeme = dct['root']
+ features = dct['features']
+ self.add_genform(form, lexeme, features)
+
+ def add_genform(self, form, lexeme, features):
+ """Add the form to a lexeme- and feature-keyed dict."""
+ if lexeme not in self.genforms:
+ self.genforms[lexeme] = {}
+ featdict = self.genforms[lexeme]
+ # features is a Features object; convert it to a list of tuples
+ features = tuple(features.to_list())
+ featdict[features] = form
+# feat = features.pop(0)
+# self.make_featdict(featdict, feat, features, form)
+
+# @staticmethod
+# def make_featdict(featdict, feat, features, form):
+# """Make a feat-value dict with the form as final value."""
+# if not features:
+# featdict[feat] = form
+# return
+# if feat not in featdict:
+# featdict[feat] = {}
+# new_feat = features.pop(0)
+# Language.make_featdict(featdict[feat], new_feat, features, form)
+
+## def add_class(self, cls):
+## if cls in self.words:
+## s = "Class {} already in dictionary"
+## raise(LanguageError(s.format(cls)))
+## entry = Lex(cls, self)
+## # Maybe not a list since there's always only one
+## self.words[cls] = [entry]
+## self.changed = True
+## return entry
+##
+## def add_mwe(self, name, head, head_order=None, head_lexeme=False):
+## entry = MWE(name, self, head, head_order=head_order,
head_lexeme=head_lexeme)
+## if head not in self.mwes:
+## self.mwes[head] = []
+## self.mwes[head].append(entry)
+## self.changed = True
+## return entry
+
+ def add_group(self, tokens, head_index=-1, head='', name='',
features=None):
+ group = Group(tokens, head_index=head_index, head=head,
+ language=self, name=name, features=features)
+# print('Group {}, head {}'.format(group, group.head))
+ if features:
+ head_i = tokens.index(group.head)
+ head_feats = features[head_i]
+ else:
+ head_feats = None
+ self.add_group_to_lexicon(group.head, group, head_feats)
+ self.groupnames[group.name] = group
+ self.changed = True
+ return group
+
+ def add_group_to_lexicon(self, head, group, features):
+ if not features:
+ # Add the group to the list of groups for the head word/lexeme
+ if head not in self.groups:
+ self.groups[head] = {}
+ if () not in self.groups[head]:
+ self.groups[head][()] = []
+ self.groups[head][()].append(group)
+ else:
+ # Convert fv dict to an alphabetized tuple of fv pairs
+ fvs = list(features.items())
+ fvs.sort()
+ fvs = tuple(fvs)
+ if head not in self.groups:
+ self.groups[head] = {}
+ if fvs not in self.groups[head]:
+ self.groups[head][fvs] = []
+ self.groups[head][fvs].append(group)
+
+## def add_gram(self, gram, feature, count=1):
+## """A gram, for example, 'plural', must have a feature, for
example,
+## 'number'."""
+## if gram in self.grams:
+## s = "Grammatical morpheme {} already in dictionary"
+## raise(LanguageError(s.format(gram)))
+## entry = Entry(gram, self)
+## self.grams[gram] = entry
+## entry.feature = feature
+## self.grams[gram] = entry
+## self.record_gram(gram, feature, count)
+## self.changed = True
+## return entry
+##
+## def record_gram(self, name, feature, count):
+## """Record the gram value and its count under its feature name."""
+## if 'features' not in self.possible:
+## self.possible['features'] = {}
+## if feature not in self.possible['features']:
+## self.possible['features'][feature] = {}
+## self.possible['features'][feature][name] = count
+##
+## def get_possible_feat_values(self, feature):
+## """Possible values and associated counts for grammatical
feature."""
+## if 'features' not in self.possible:
+## self.possible['features'] = {}
+## return self.possible['features'].get(feature)
+
+ ### Basic getters.
+
+## def get_words(self, word):
+## """Returns a list of word entries."""
+## return self.words.get(word)
+##
+## def get_class(self, cls):
+## """Returns a single class entry."""
+## return self.words.get(cls)[0]
+##
+## def get_gram(self, gram):
+## """Returns a single gram feature value entry."""
+## return self.grams.get(gram)
+##
+## def get_lexeme(self, lexeme):
+## """Returns a single lexeme entry."""
+## return self.words.get(lexeme)[0]
+
+ ### Generation of word forms
+
+ def generate(self, root, features, verbosity=0):
+ if verbosity:
+ print("Generating {}:{}".format(root, features))
+ if not features:
+ # Just return the "root"
+ return [root]
+ if root not in self.genforms:
+ print("Impossible to generate root {}".format(root))
+ return
+ gendict = self.genforms[root]
+ # List of matching forms
+ result = []
+ for feat_list, form in gendict.items():
+ if features.match_list(feat_list):
+ result.append(form)
+# print('Feat list {}, form {}'.format())
+ if not result:
+ print("No forms found for {}:{}".format(root, features))
+ return result
+
+## ## Dependencies (word, lexeme, class entries)
+##
+## def record_label(self, label):
+## """Record the dependency label in the set of possible labels."""
+## if 'deplabels' not in self.possible:
+## self.possible['deplabels'] = []
+## if label not in self.possible['deplabels']:
+## self.possible['deplabels'].append(label)
+##
+## def get_possible_labels(self):
+## return self.possible.get('deplabels')
+##
+## ## Order constraints
+## ## A constraint is a tuple of dependency labels and '^' representing
the head
+##
+## def record_order(self, constraint):
+## """Record the constraint tuple in the set of possible
constraints for the language."""
+## if 'order' not in self.possible:
+## self.possible['order'] = []
+## if constraint not in self.possible['order']:
+## # Append a *copy* of the constraint list
+## self.possible['order'].append(constraint[:])
+##
+## def get_possible_orders(self):
+## return self.possible.get('order')
+##
+## ## Agreement constraints
+##
+## def record_agr(self, constraint):
+## """An agreement constraint is a tuple consisting of
+## dep label, head feature, dependent feature."""
+## if 'agr' not in self.possible:
+## self.possible['agr'] = []
+## if constraint not in self.possible['agr']:
+## # Append a *copy* of the constraint list
+## self.possible['agr'].append(constraint[:])
+
+class LanguageError(Exception):
+ '''Class for errors encountered when attempting to update the
language.'''
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return repr(self.value)
+
=======================================
--- /dev/null
+++ /hiiktuu/languages/amh.lg Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,64 @@
+name: አማርኛ
+abbrev: amh
+groups:
+ በላ:
+ - words: [$food, በላ]
+ features: [{case: acc}, False]
+ trans:
+ orm:
+ - [$food_nyaate,
+ {align: [0, 1],
+ agr: [false, {sp: prs, sn: num, sg: gen}]}]
+ አወቀ:
+ - words: [$vb, አወቀ]
+ features: [{tam: ger}, {tam: imf, pol: neg}]
+ agr: [[0, 1, [sp, sp], [sn, sn], [sg, sg]]]
+ name: ^አድርጎ^አያውቅም
+ trans:
+ orm:
+ - [^godhe_hin^beeku,
+ {align: [0, 2],
+ agr: [{sp: prs, sn: num, sg: gen},
+ {sp: prs, sn: num, sg: gen}]}]
+ - words: [$fact, አወቀ]
+ trans:
+ orm:
+ - [$fact_beeke,
+ {align: [0, 1],
+ agr: [False, {sp: prs, sn: num, sg: gen}]}]
+ አሳ:
+ - words: [አሳ]
+ trans:
+ orm:
+ - [qurxummii]
+ - [kalluuna]
+
+forms:
+ ያውቃል:
+ root: አወቀ
+ features: {tam: imf, pol: aff, sp: 3, sn: 0, sg: 0}
+ cats: [$vb]
+ አያውቅም:
+ root: አወቀ
+ features: {tam: imf, pol: neg, sp: 3, sn: 0, sg: 0}
+ cats: [$vb]
+ በልቶ:
+ root: በላ
+ features: {tam: ger, sp: 3, sn: 0, sg: 0}
+ cats: [$vb]
+ በልተው:
+ root: በላ
+ features: {tam: ger, sp: 3, sn: 1}
+ cats: [$vb]
+ በላ:
+ root: በላ
+ features: {tam: prf, sp: 3, sn: 0, sg: 0}
+ cats: [$vb]
+ ድንች:
+ root: ድንች
+ features: {num: 0, poss: 0, def: 0}
+ cats: [$food, $thing]
+ አሳ:
+ root: አሳ
+ features: {num: 0, poss: 0, def: 0}
+ cats: [$food, $animal, $thing]
=======================================
--- /dev/null
+++ /hiiktuu/languages/eng.lg Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,55 @@
+name: English
+abbrev: eng
+forms:
+ "it's":
+ seg:
+ - [it, {root: it, features: {num: 0, per: 3, gen: 2}, cats: [$pron]}]
+ - [is, {root: be, features: {tns: prs, per: 3, num: 0}, cats: [$aux,
$cop]}]
+ end:
+ root: end
+ features: {num: 0, prs: 3}
+ cats: [$abs]
+ boy:
+ root: boy
+ features: {num: 0, prs: 3}
+ cats: [$sbd]
+ act:
+ root: act
+ features: {num: 0, prs: 3}
+ cats: [$sth]
+ us:
+ root: we
+ features: {num: 1, prs: 1, case: 1}
+ cats: [$sbd]
+ them:
+ root: they
+ features: {num: 1, prs: 3, case: 1}
+ song:
+ root: song
+ features: {num: 0, prs: 3}
+ cats: [$singable]
+ sang:
+ root: sing
+ features: {tns: prt}
+ read:
+ root: read
+ features: {tns: prt}
+
+groups:
+ end:
+ - words: [the, end, of, the, world]
+ read:
+ - words: [read, $sbd, the, riot, act]
+ - words: [read, $sth]
+ give:
+ - words: [give, $sbd, a, piece, of, $ones, mind]
+ sing:
+ - words: [sing, $singable]
+ boy:
+ - words: [the, boy]
+ us:
+ - words: [us]
+ them:
+ - words: [them]
+ song:
+ - words: [a, song]
=======================================
--- /dev/null
+++ /hiiktuu/languages/orm.lg Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,44 @@
+name: afaan oromoo
+abbrev: orm
+groups:
+ nyaate:
+ - words: [$food, nyaate]
+ features: [{case: acc}, False]
+ beeke:
+ - words: [$vb, hin, beeke]
+ features: [{tam: cnt}, false, {tam: prs, pol: neg}]
+ name: ^godhe_hin^beeku
+ - words: [$fact, beeke]
+ qurxummii:
+ - words: [qurxummii]
+ kalluuna:
+ - words: [kalluuna]
+forms:
+ beeka:
+ root: beeke
+ features: {tam: prs, pol: aff, prs: 3, num: 0, gen: 0}
+ cats: [$vb]
+ beeku:
+ root: beeke
+ features: {tam: prs, pol: neg, prs: 3, num: 0, gen: 0}
+ cats: [$vb]
+ nyaate:
+ root: nyaate
+ features: {tam: cnt, prs: 3, num: 0, gen: 0}
+ cats: [$vb]
+ nyaatani:
+ root: nyaate
+ features: {tam: cnt, prs: 3, num: 1}
+ cats: [$vb]
+ dinnicha:
+ root: dinnicha
+ features: {num: 0, case: acc}
+ cats: [$food, $thing]
+ qurxummii:
+ root: qurxummii
+ features: {num: 0, case: acc}
+ cats: [$food, $animal, $thing]
+ kalluuna:
+ root: kalluuna
+ features: {num: 0, case: acc}
+ cats: [$food, $animal, $thing]
=======================================
--- /dev/null
+++ /hiiktuu/languages/spa.lg Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,67 @@
+name: español
+abbrev: spa
+
+groups:
+ cantar:
+ # cantar las cuarenta (with pronoun indirect object)
+ - words: [$algnp, cantar, las, cuarenta]
+ trans:
+ eng:
+ - [read_$sbd_the_riot_act,
+ {align: [1, 0, 2, 3],
+ agr: [false, {tmp: tns, num: num, prs: prs}, false, false]}]
+ # cantar (una canción)
+ - words: [cantar, $cantable]
+ trans:
+ eng:
+ - [sing_$singable,
+ {align: [0, 1],
+ agr: [{tmp: tns, num: num, prs: prs}, false]}]
+ canción:
+ - words: [una, canción]
+ trans:
+ eng:
+ - [a_song, {align: [0, 1]}]
+ canciones:
+ - words: [canciones]
+ les:
+ - words: [les]
+ trans:
+ eng:
+ - [them]
+ muchacho:
+ - words: [el, muchacho]
+ trans:
+ eng:
+ - [the boy, {align: [0, 1]}]
+
+forms:
+ canción:
+ root: canción
+ features: {num: 0, prs: 3}
+ cats: [$cantable, $algo]
+ canciones:
+ root: canción
+ features: {num: 1, prs: 3}
+ cats: [$cantable, $algo]
+ les:
+ root: ellos
+ features: {num: 1, prs: 3, case: i}
+ cats: [$algnp]
+ nos:
+ root: nosotros
+ features: {num: 1, prs: 1, case: i}
+ cantó:
+ root: cantar
+ features: {num: 0, prs: 3, tmp: prt}
+ canta:
+ root: cantar
+ features: {num: 0, prs: 3, tmp: prs}
+ muchacho:
+ root: muchacho
+ features: {num: 0, prs: 3, gen: 0}
+ cats: [$algn]
+
+
+
+
=======================================
--- /dev/null
+++ /hiiktuu/sentence.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,1149 @@
+#
+# Hiiktuu sentences and how to parse and translate them.
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.04.15
+# -- Created.
+# 2014.04.19-20
+# -- Group matching. GInst, GNode, and SNode classes.
+# 2014.04.22
+# -- Solution class.
+# 2014.04.26-7
+# -- Translation class and realization.
+# 2014.04.28
+# -- Variables for sentence analysis.
+# 2014.04.29-30
+# -- Fixed some constraints and initial variables values.
+# 2014.05.01
+# -- Handling source words that aren't "covered" (no group candidate
matches them):
+# variables and source constraints.
+# 2014.05.02
+# -- Uncovered source words: target ordering constraints.
+# Introduced "chunks": target output units that are not connected.
+# 2014.05.04-5
+# -- New agreement variables and constraints.
+
+import itertools
+# ui.py loads language, etc.
+from .ui import *
+from .cs import *
+
+class Sentence:
+ """A sentence is a list of words (or other lexical tokens) that gets
+ assigned a set of variables and constraints that are run during
+ parsing or translation."""
+
+ id = 0
+
+ def __init__(self, raw='', language=None,
+ tokens=None, analyses=None,
+ nodes=None, groups=None, target=None,
+ verbosity=0):
+ self.set_id()
+ # A string representing the raw sentence
+ self.raw = raw
+ # Source language: a language object
+ self.language = language
+ # Target language: a language object or None
+ self.target = target
+ # A list of SNode objects, one for each token
+ self.nodes = nodes or []
+ # A list of candidate groups (realized as GInst objects) found
during lexicalization
+ self.groups = groups or []
+ # Control messages
+ self.verbosity = verbosity
+ # GNodes in GInsts
+ self.gnodes = []
+ # A list of constraints to run
+ self.constraints = []
+ # Root domain store for variables
+ self.dstore = DStore(name="S{}".format(self.id))
+ # A dict of sentence-level variables
+ self.variables = {}
+ # Solver to find solutions
+ self.solver = Solver(self.constraints, self.dstore,
+ description='for group selection',
verbosity=verbosity)
+ # Solutions found during parsing
+ self.solutions = []
+ if verbosity:
+ print("Created Sentence object {}".format(self))
+
+ def set_id(self):
+ self.id = Sentence.id
+ Sentence.id += 1
+
+ def __repr__(self):
+ """Print name."""
+ if self.raw:
+ return '|| ({}) {} ||'.format(self.id, self.raw)
+ else:
+ return '|| {} sentence {} ||'.format(self.language, self.id)
+
+ def do(self, verbosity=0):
+ """If target language, translate. If not, parse."""
+# if verbosity:
+ if self.target:
+ print("Attempting to translate sentence {}".format(self))
+ else:
+ print("Attempting to parse sentence {}".format(self))
+ if self.initialize(verbosity=verbosity):
+ if self.solve(verbosity=verbosity):
+ if self.target:
+ for solution in self.solutions:
+ solution.translate(verbosity=verbosity)
+ print("Translations found:")
+ for translation in solution.translations:
+ translation.display()
+ else:
+ print("No solution found")
+
+ def parse(self, verbosity=0):
+ print("Attempting to parse {}".format(self))
+ if self.initialize(verbosity=verbosity):
+ self.solve(verbosity=verbosity)
+
+ def initialize(self, verbosity=0):
+ """Things to do before running constraint satisfaction."""
+ if verbosity:
+ print("Initializing {}".format(self))
+ self.tokenize(verbosity=verbosity)
+ self.lexicalize(verbosity=verbosity)
+ if not self.groups:
+ print("No groups found for {}".format(self))
+ return False
+ else:
+ self.create_variables(verbosity=verbosity)
+ self.create_constraints(verbosity=verbosity)
+ return True
+
+ def solve(self, verbosity=0):
+ """Run constraints and create a single solution."""
+ if verbosity:
+ print("Attempting to find solutions for {}".format(self))
+ if self.run(verbosity=verbosity):
+ self.create_solution(verbosity=verbosity)
+ if verbosity:
+ print("Found solution {}".format(self.solutions[0]))
+ return True
+ else:
+ if verbosity:
+ print("No solution found")
+ return False
+
+ def tokenize(self, verbosity=0):
+ """Segment the sentence string into tokens, analyze them
morphologically,
+ and create a SNode object for each."""
+ if verbosity:
+ print("Tokenizing {}".format(self))
+ if not self.nodes:
+ # (Otherwise it's already done.)
+ # Split at spaces by default (later allow for dedicated
language-specific tokenizers).
+ tokens = self.raw.split()
+ self.nodes = []
+ index = 0
+ for token in tokens:
+ # Look up token in language.forms
+ if token not in self.language.forms:
+ # Not found, just use the raw string
+ self.nodes.append(SNode(token, index, None, self))
+ index += 1
+ else:
+ # A dict, for unambiguous forms, or a list of dicts,
for ambiguous forms
+ formdict = self.language.forms[token]
+ if isinstance(formdict, dict):
+ # A single entry
+ if 'seg' in formdict:
+ segs = formdict['seg']
+ for seg in segs:
+ tok, analysis = seg
+ self.nodes.append(SNode(tok, index,
analysis, self))
+ index += 1
+ else:
+ self.nodes.append(SNode(token, index,
formdict, self))
+ index += 1
+ else:
+ # Multiple dicts: ambiguity; let node handle it
+ self.nodes.append(SNode(token, index, formdict,
self))
+ index += 1
+
+ def lexicalize(self, verbosity=0):
+ """Find and instantiate all groups that are compatible with the
tokens in the sentence."""
+ if verbosity:
+ print("Lexicalizing {}".format(self))
+ if not self.nodes:
+ print("Tokenization must precede lexicalization.")
+ return
+ candidates = []
+ for node in self.nodes:
+ # Get keys into lexicon for this node
+ keys = {node.token}
+ anal = node.analyses
+ if anal:
+ if isinstance(anal, list):
+ for a in anal:
+ keys.add(a.get('root'))
+ else:
+ keys.add(anal.get('root'))
+ # Look up candidate groups in lexicon
+ for k in keys:
+ if k in self.language.groups:
+ for group in self.language.groups[k]:
+# print("Checking group {} for {}".format(group,
node))
+ # Reject group if it doesn't have a translation in
the target language
+ if self.target and not
group.get_translations(self.target.abbrev):
+ print("No translation for {}".format(group))
+ continue
+ candidates.append((node.index, group))
+ # Now filter candidates to see if all words are present in the
sentence
+ # For each group, save a list of list of sentence token indices
that correspond
+ # to the group's words
+ groups = []
+ for head_i, group in candidates:
+ # Matching snodes, along with root and unified features if any
+ if verbosity > 1:
+ print("Matching group {}".format(group))
+ snodes = group.match_nodes(self.nodes, head_i)
+ if not snodes:
+ # This group is out
+ if verbosity > 1:
+ print("Failed to match")
+ continue
+ if verbosity > 1:
+ print('Group {} matches snodes {}'.format(group, snodes))
+ groups.append((head_i, snodes, group))
+ # Create a GInst object and GNodes for each surviving group
+ self.groups = [GInst(group, self, head_i, snodes, index) for
index, (head_i, snodes, group) in enumerate(groups)]
+ # Assign sentence-level indices to each GNode; store gnodes in list
+ sent_index = 0
+ for group in self.groups:
+ for gnode in group.nodes:
+ gnode.sent_index = sent_index
+ self.gnodes.append(gnode)
+ sent_index += 1
+ # Number of GNodes
+ self.ngnodes = sent_index
+ # Record uncovered snodes
+ covered = {}
+ for gnode in self.gnodes:
+ si = gnode.snode_indices
+ for i in si:
+ if i not in covered:
+ covered[i] = []
+ covered[i].append(gnode.sent_index)
+ for snode in self.nodes:
+ snode.gnodes = covered.get(snode.index, [])
+
+ ## Create IVars and (set) Vars with sentence DS as root DS
+
+ def ivar(self, name, domain, ess=False):
+ self.variables[name] = IVar(name, domain, rootDS=self.dstore,
+ essential=ess)
+
+ def svar(self, name, lower, upper, lower_card=0, upper_card=MAX,
+ ess=False):
+ self.variables[name] = Var(name, lower, upper, lower_card,
upper_card,
+ essential=ess, rootDS=self.dstore)
+
+ def create_variables(self, verbosity=0):
+ # All abstract (category) and instance (word or lexeme) gnodes
+ catnodes = set()
+ instnodes = set()
+ for group in self.groups:
+ for node in group.nodes:
+ if node.cat:
+ catnodes.add(node.sent_index)
+ else:
+ instnodes.add(node.sent_index)
+ # Snodes that are merged with catnodes
+ merged_snodes = set()
+ for gn_index in catnodes:
+ gn = self.gnodes[gn_index]
+ merged_snodes.update(gn.snode_indices)
+
+ self.svar('groups', set(), set(range(len(self.groups))),
+ # At least 1, at most all groups
+ 1, len(self.groups),
+ ess=True)
+ self.svar('gnodes', set(), set(range(self.ngnodes)),
+ # At least size of smallest group, at most all
+ min([len(g.nodes) for g in self.groups]),
+ self.ngnodes)
+ # covered snodes
+ covered_snodes = {sn.index for sn in self.nodes if sn.gnodes}
+ self.variables['snodes'] = DetVar('snodes', covered_snodes)
+ # Category (abstract) nodes
+ self.svar('catgnodes', set(), catnodes)
+ # Instance gnodes that are merged with catnodes
+ self.svar('merged_gnodes', set(), instnodes, 0, len(catnodes))
+ # Snodes that involve merger of gnodes (that have two associated
gnodes)
+ self.svar('merged_snodes', set(), merged_snodes, 0, len(catnodes))
+ # Position pairs
+ pos_pairs = set()
+ for group in self.groups:
+ pos_pairs.update(group.pos_pairs())
+ self.svar('gnode_pos', set(), pos_pairs)
+ ## Create variables for SNodes, GInsts, and GNodes
+ for snode in self.nodes:
+ snode.create_variables()
+ for ginst in self.groups:
+ ginst.create_variables()
+ for gnode in self.gnodes:
+ gnode.create_variables()
+
+ def create_constraints(self, verbosity=0):
+ if verbosity:
+ print("Creating constraints for {}".format(self))
+ # Relation among abstract, concrete, and all gnodes for each snode
+ for snode in self.nodes:
+ if snode.gnodes:
+ # Only do this for covered snodes
+ self.constraints.extend(Union([snode.variables['gnodes'],
+ snode.variables['cgnodes'],
+
snode.variables['agnodes']]).constraints)
+ # Constraints involved groups with category (abstract) nodes
+ for group in self.groups:
+ if group.nanodes > 0:
+ # Only do this for groups with abstract nodes
+ # For each group, the set of snodes is the union of the
concrete and abstract nodes
+
self.constraints.extend(Union([group.variables['gnodes_pos'],
+
group.variables['agnodes_pos'],
+
group.variables['cgnodes_pos']]).constraints)
+ # For each group, the set of groups merged with it +
itself is the union of the
+ # set of groups merged with it and the set consisting of
its index
+
self.constraints.extend(Union([group.variables['merged_groups_incl'],
+
group.variables['merged_groups_excl'],
+
DetVar('g{}'.format(group.index), {group.index})]).constraints)
+ # The set of merged gnodes for the group is the union of
the merged nodes for all
+ # abstract gnodes in the group
+
self.constraints.append(UnionSelection(group.variables['merged_gnodes'],
+
group.variables['agnodes'],
+
[gn.variables['merge_cgn'] for gn in self.gnodes]))
+ # The set of groups merged with the group is the union of
groups associated with the
+ # gnodes that are merged with the group's abstract nodes
+
self.constraints.append(UnionSelection(group.variables['merged_groups_excl'],
+
group.variables['merged_gnodes'],
+
[DetIVar("gn{}->g".format(gn.sent_index), gn.ginst.index) for gn in
self.gnodes]))
+ # The tree under this group consists of the union of the
snodes associated with this group
+ # and those merged with it
+
self.constraints.append(UnionSelection(group.variables['tree'],
+
group.variables['merged_groups_incl'],
+
[g.variables['gnodes_pos'] for g in self.groups]))
+ for gnode in group.nodes:
+ if gnode.cat:
+ # The gnodes that this abstract merges with must
be in the set of selected gnodes
+
self.constraints.extend(Inclusion([gnode.variables['merge_cgn'],
+
self.variables['gnodes']]).constraints)
+ # The set of category (abstract) nodes used is the union of the
category nodes of the groups used
+ self.constraints.append(UnionSelection(self.variables['catgnodes'],
+ self.variables['groups'],
+ [g.variables['agnodes'] for
g in self.groups]))
+ # The set of merged gnodes used is the union of the merged nodes
of the selected category nodes
+
self.constraints.append(UnionSelection(self.variables['merged_gnodes'],
+ self.variables['catgnodes'],
+ [gn.variables['merge_cgn']
for gn in self.gnodes]))
+ # The set of merged gnodes used is the union of the merged gnodes
of all merging snodes
+
self.constraints.append(UnionSelection(self.variables['merged_gnodes'],
+
self.variables['merged_snodes'],
+ [sn.variables['mgnodes']
for sn in self.nodes]))
+ # The set of category gnodes used is the union of the category
nodes associated with all merged snodes
+ self.constraints.append(UnionSelection(self.variables['catgnodes'],
+
self.variables['merged_snodes'],
+ [sn.variables['agnodes']
for sn in self.nodes]))
+ # The set of category gnodes used is the union of the category
nodes associated with all merged gnodes
+ self.constraints.append(UnionSelection(self.variables['catgnodes'],
+
self.variables['merged_gnodes'],
+ [gn.variables['merge_agn']
for gn in self.gnodes]))
+ # The set of merged snodes used is the union of the snodes
associated with all category nodes used
+
self.constraints.append(UnionSelection(self.variables['merged_snodes'],
+ self.variables['catgnodes'],
+ [gn.variables['merge_cw']
for gn in self.gnodes]))
+ # The set of merged snodes used is the union of the snodes
associated with all merged gnodes
+
self.constraints.append(UnionSelection(self.variables['merged_snodes'],
+
self.variables['merged_gnodes'],
+ [gn.variables['merge_aw']
for gn in self.gnodes]))
+ # The set of merged gnodes must be a subset of the set of used
gnodes
+ self.constraints.extend(Inclusion([self.variables['merged_gnodes'],
+
self.variables['gnodes']]).constraints)
+ # All snodes must have distinct category nodes
+ self.constraints.extend(Disjoint([sn.variables['agnodes'] for sn
in self.nodes]).constraints)
+ # All concrete gnodes must have distinct category nodes
+ self.constraints.extend(Disjoint([gn.variables['merge_agn'] for gn
in self.gnodes]).constraints)
+ # All position constraints for snodes
+
self.constraints.append(PrecedenceSelection(self.variables['gnode_pos'],
+
[gn.variables['snodes'] for gn in self.gnodes]))
+ # Position constraint pairs are the group position pairs for all
groups used
+ self.constraints.append(UnionSelection(self.variables['gnode_pos'],
+ self.variables['groups'],
+
[DetVar("g{}pos".format(g.index), g.pos_pairs()) for g in self.groups]))
+ # Union selection on gnodes for each snode:
+ # the union of the snode indices associated with the gnodes of an
snode is the snode's index
+ gn2s = [gn.variables['snodes'] for gn in self.gnodes]
+ s2gn = [s.variables['gnodes'] for s in self.nodes]
+ for snode in self.nodes:
+ if snode.gnodes:
+ # Only for covered snodes
+
self.constraints.append(UnionSelection(DetVar("sn{}".format(snode.index),
{snode.index}),
+
snode.variables['gnodes'],
+ gn2s))
+ # Union of all gnodes used for snodes is all gnodes used
+ self.constraints.append(UnionSelection(self.variables['gnodes'],
+ self.variables['snodes'],
+ s2gn))
+ # Union of all gnodes for groups used is all gnodes used
+ self.constraints.append(UnionSelection(self.variables['gnodes'],
+ self.variables['groups'],
+ [g.variables['gnodes'] for
g in self.groups]))
+ # Union of all snodes for gnodes used is all snodes
+ self.constraints.append(UnionSelection(self.variables['snodes'],
+ self.variables['gnodes'],
+ [gn.variables['snodes'] for
gn in self.gnodes]))
+ # Complex union selection by groups on positions of all concrete
gnodes in each selected group
+
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
+
selvars=[g.variables['cgnodes_pos'] for g in self.groups],
+
seqvars=[s.variables['cgnodes'] for s in self.nodes],
+
mainvars=[g.variables['cgnodes'] for g in self.groups]))
+ # Complex union selection by groups on positions of all category
gnodes in each selected group
+
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
+
selvars=[g.variables['agnodes_pos'] for g in self.groups],
+
seqvars=[s.variables['agnodes'] for s in self.nodes],
+
mainvars=[g.variables['agnodes'] for g in self.groups]))
+# # Complex union selection by groups on positions of all gnodes in
each selected group
+#
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
+#
selvars=[g.variables['gnodes_pos'] for g in self.groups],
+#
seqvars=[s.variables['gnodes'] for s in self.nodes],
+#
mainvars=[g.variables['gnodes'] for g in self.groups]))
+ # Set convexity (projectivity) within each group tree
+
self.constraints.append(ComplexSetConvexity(self.variables['groups'],
+ [g.variables['tree']
for g in self.groups]))
+ # Agreement
+ if any([g.variables.get('agr') for g in self.groups]):
+
self.constraints.append(ComplexAgrSelection(selvar=self.variables['groups'],
+
seqvars=[gn.variables['snodes'] for gn in self.gnodes],
+
featvars=[sn.variables['features'] for sn in self.nodes],
+
selvars=[g.variables.get('agr', EMPTY) for g in self.groups]))
+
+ def run(self, verbosity=0):
+ """Run constraint satisfaction on constraints, for now without
search if
+ no solution is found."""
+ self.solver.run(verbosity=verbosity)
+ if verbosity:
+ print("Solver status after run: {}".format(self.solver.status))
+ return self.solver.status
+
+ def create_solution(self, dstore=None, verbosity=0):
+ """Assuming essential variables are determined in a domain store,
make a Solution object."""
+ dstore = dstore or self.dstore
+ # Get the indices of the selected groups
+ groups = self.variables['groups'].get_value(dstore=dstore)
+ ginsts = [self.groups[g] for g in groups]
+ # Get the indices of the GNodes for each SNode
+ s2gnodes = []
+ for node in self.nodes:
+ gnodes = node.variables['gnodes'].get_value(dstore=dstore)
+ s2gnodes.append(gnodes)
+ self.solutions.append(Solution(self, ginsts, s2gnodes,
+ len(self.solutions)))
+
+class SNode:
+ """Sentence token and its associated analyses and variables."""
+
+ def __init__(self, token, index, analyses, sentence):
+ # Raw form in sentence (possibly result of segmentation)
+ self.token = token
+ # Position in sentence
+ self.index = index
+ # List of analyses
+ if analyses and not isinstance(analyses, list):
+ analyses = [analyses]
+ self.analyses = analyses
+ # Back pointer to sentence
+ self.sentence = sentence
+ # We'll need these for multiple matchings
+ self.cats = self.get_cats()
+ # Indices of candidate gnodes for this snode found during
lexicalization
+ self.gnodes = None
+ # Dict of variables specific to this SNode
+ self.variables = {}
+ ## Tokens in target language for this SNode
+ self.translations = []
+
+ def __repr__(self):
+ """Print name."""
+ return "*{}:{}".format(self.token, self.index)
+
+ ## Create IVars and (set) Vars with sentence DS as root DS
+
+ def ivar(self, key, name, domain, ess=False):
+ self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
+ essential=ess)
+
+ def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
+ ess=False):
+ self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
+ rootDS=self.sentence.dstore,
essential=ess)
+
+ def lvar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
+ ess=False):
+ self.variables[key] = LVar(name, lower, upper, lower_card,
upper_card,
+ rootDS=self.sentence.dstore,
essential=ess)
+
+ def create_variables(self, verbosity=0):
+ if not self.gnodes:
+ # Nothing matched this snode; all variables empty
+ self.variables['gnodes'] = EMPTY
+ self.variables['cgnodes'] = EMPTY
+ self.variables['agnodes'] = EMPTY
+ self.variables['mgnodes'] = EMPTY
+ else:
+ # GNodes associated with this SNode: 0, 1, or 2
+ self.svar('gnodes', "w{}->gn".format(self.index), set(),
+ set(range(self.sentence.ngnodes)),
+ 0, 2, ess=True)
+ # Concrete GNodes associated with this SNode: must be 1
+ self.svar('cgnodes', "w{}->cgn".format(self.index), set(),
+ {gn.sent_index for gn in self.sentence.gnodes if not
gn.cat},
+ 1, 1)
+ # Abstract GNodes associated with this SNode: 0 or 1
+ self.svar('agnodes', "w{}->agn".format(self.index), set(),
+ {gn.sent_index for gn in self.sentence.gnodes if
gn.cat},
+ 0, 1)
+ # Merged concrete GNodes associated with this SNode: 0 or 1
+ self.svar('mgnodes', "w{}->mgn".format(self.index), set(),
+ {gn.sent_index for gn in self.sentence.gnodes if not
gn.cat},
+ 0, 1)
+ # Features
+ features = self.get_features()
+ if len(features) > 1:
+ self.lvar('features', 'w{}f'.format(self.index),
+ [], features, 1, 1)
+ else:
+ # Only one choice so features are determined for this SNode
+ self.variables['features'] =
DetLVar('w{}f'.format(self.index), features)
+
+ def get_cats(self):
+ """The set of categories for the node's token, or None."""
+ if not self.analyses:
+ return None
+ cats = set()
+ for analysis in self.analyses:
+ if 'cats' in analysis:
+ cats.update(analysis['cats'])
+ return cats
+
+ def get_features(self):
+ """The list of possible Features objects for the SNode."""
+ features = []
+ if self.analyses:
+ for analysis in self.analyses:
+ if 'features' in analysis:
+ features.append(analysis['features'])
+ else:
+ features.append(Features({}))
+ return features
+
+ def match(self, item, features, verbosity=0):
+ """Does this node match the group item (word, lexeme, category) and
+ any features associated with it?"""
+ if verbosity:
+ print(' SNode {} with features {} trying to match item {}
with features {}'.format(self, self.analyses, item, features))
+ # If item is a category, don't bother looking at token
+ if Entry.is_cat(item):
+ if verbosity:
+ print(' Cat item, looking in {}'.format(self.cats))
+ if self.cats and item in self.cats:
+# print(" Token {} is in cat {}".format(self.token, item))
+ if not self.analyses or not features:
+ # Match; failure would be False
+ return None
+ else:
+ for analysis in self.analyses:
+ node_features = analysis.get('features')
+ if node_features:
+ u_features = node_features.unify(features)
+ if u_features != 'fail':
+ return analysis.get('root'), u_features
+# print(" Matching group features {} and sentence
features {}".format(features, node_features))
+# if node_features and
node_features.unify(features) != 'fail':
+# return True
+ # None succeeded
+ return False
+ elif self.token == item:
+ # item matches this node's token; features are irrelevant
+ return None
+ elif self.analyses:
+ # Check each combination of root and analysis features
+ for analysis in self.analyses:
+ root = analysis.get('root', '')
+ node_features = analysis.get('features')
+# print(" SNode features {}".format(node_features))
+ if root == item:
+ if not features:
+ return root, node_features
+# return True
+ elif not node_features:
+ return root, features
+# return True
+ else:
+ u_features = node_features.unify(features)
+ if u_features != 'fail':
+ return root, u_features
+# elif node_features.unify(features) != 'fail':
+# return True
+ return False
+
+class GInst:
+
+ """Instantiation of a group; holds variables and GNode objects."""
+
+ def __init__(self, group, sentence, head_index, snode_indices, index):
+ # The Group object that this "instantiates"
+ self.group = group
+ self.sentence = sentence
+ self.target = sentence.target
+ # Index of group within the sentence
+ self.index = index
+ # Index of SNode associated with group head
+ self.head_index = head_index
+ # List of GNodes
+ self.nodes = [GNode(self, index, indices) for index, indices in
enumerate(snode_indices)]
+ # Dict of variables specific to this group
+ self.variables = {}
+ # List of target language groups
+ self.translations = []
+ self.ngnodes = len(self.nodes)
+ # Number of abstract nodes
+ self.nanodes = len([n for n in self.nodes if n.cat])
+ # Number of concrete nodes
+ self.ncgnodes = self.ngnodes - self.nanodes
+
+ def __repr__(self):
+ return '<<{}:{}>>'.format(self.group.name, self.group.id)
+
+ def pos_pairs(self):
+ """Return position constraint pairs for gnodes in the group."""
+ gnode_pos = [gn.sent_index for gn in self.nodes]
+ return set(itertools.combinations(gnode_pos, 2))
+
+ def gnode_sent_index(self, index):
+ """Convert gnode index to gnode sentence index."""
+ return self.nodes[index].sent_index
+
+ def get_agr(self):
+ """Return agr constraints for group, converted to tuples."""
+ result = []
+ if self.group.agr:
+ for a in self.group.agr[:]:
+ feats = [tuple(pair) for pair in a[2:]]
+ a[2:] = feats
+ # Convert gnode positions to sentence positions
+ a[0] = self.gnode_sent_index(a[0])
+ a[1] = self.gnode_sent_index(a[1])
+ result.append(tuple(a))
+ return set(result)
+
+ ## Create IVars and (set) Vars with sentence DS as root DS
+
+ def ivar(self, key, name, domain, ess=False):
+ self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
+ essential=ess)
+
+ def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
+ ess=False):
+ self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
+ rootDS=self.sentence.dstore,
+ essential=ess)
+
+ def create_variables(self, verbosity=0):
+ ngroups = len(self.sentence.groups)
+ nsnodes = len(self.sentence.nodes)
+ # GNode indices for this GInst (determined)
+ self.variables['gnodes'] =
DetVar('g{}->gnodes'.format(self.index), {gn.sent_index for gn in
self.nodes})
+ # Abstract GNode indices for GInst (determined)
+ if self.nanodes:
+ self.variables['agnodes'] =
DetVar('g{}->agnodes'.format(self.index), {gn.sent_index for gn in
self.nodes if gn.cat})
+ # Concrete GNode indices for GInst (determined)
+ self.variables['cgnodes'] =
DetVar('g{}->cgnodes'.format(self.index), {gn.sent_index for gn in
self.nodes if not gn.cat})
+ else:
+ self.variables['agnodes'] = EMPTY
+ self.variables['cgnodes'] = self.variables['gnodes']
+ # SNode positions of GNodes for this GInst
+ self.svar('gnodes_pos', 'g{}->gnodes_pos'.format(self.index),
+ set(), set(range(nsnodes)), self.ngnodes, self.ngnodes)
+ # SNode positions of abstract GNodes for this GInst
+ if self.nanodes == 0:
+ # No abstract nodes
+ self.variables['agnodes_pos'] = EMPTY
+ # SNode positions of concrete GNodes for this GInst
+ self.variables['cgnodes_pos'] = self.variables['gnodes_pos']
+ else:
+ # Position for each abstract node in the group
+ self.svar('agnodes_pos', 'g{}->agnodes_pos'.format(self.index),
+ set(), set(range(nsnodes)), self.nanodes,
self.nanodes)
+ # Position for each concrete node in the group
+ self.svar('cgnodes_pos', 'g{}->cgnodes_pos'.format(self.index),
+ set(), set(range(nsnodes)), self.ncgnodes,
self.ncgnodes)
+ # Other GInsts merged with this one, excluding and including itself
+ if self.nanodes == 0:
+ # No abstract nodes, so this is a determined variable with one
value (its own index)
+ self.variables['merged_groups_incl'] =
DetVar('g{}->mgroups_in'.format(self.index), {self.index})
+ self.variables['merged_groups_excl'] = EMPTY
+ self.variables['merged_gnodes'] = EMPTY
+ else:
+
self.svar('merged_groups_incl', 'g{}->mgroups_in'.format(self.index),
+ {self.index}, set(range(ngroups)), 1, self.nanodes+1)
+
self.svar('merged_groups_excl', 'g{}->mgroups_ex'.format(self.index),
+ set(), set(range(ngroups)) - {self.index}, 0,
self.nanodes)
+ # Set of all gnodes that are merged with abstract gnodes in
this group
+ # upper bound is all gnodes not in this group
+ self.svar('merged_gnodes', 'g{}->mgnodes'.format(self.index),
+ set(), set(range(len(self.sentence.gnodes))) -
{gn.sent_index for gn in self.nodes})
+ # Trees under GInst head (including self)
+ if self.nanodes == 0:
+ # No abstract gnodes, so same as gnodes
+ self.variables['tree'] = self.variables['gnodes_pos']
+ else:
+ self.svar('tree', 'g{}->tree'.format(self.index),
+ # at least as long as the number of self's nodes
+ set(), set(range(nsnodes)), self.ngnodes, nsnodes)
+ # Determined variable for within-source agreement constraints,
gen: 0}
+ agr = self.get_agr()
+ if agr:
+ self.variables['agr'] = DetVar('g{}agr'.format(self.index),
agr)
+
+ def set_translations(self, verbosity=0):
+ """Find the translations of the group in the target language."""
+ translations = self.group.get_translations(self.target.abbrev,
False)
+ # If alignments are missing, add default alignment
+ for i, t in enumerate(translations):
+ if len(t) == 1:
+ translations[i] = [t[0], {'align':
list(range(len(self.nodes)))}]
+# print("Translations for {}: {}".format(self, translations))
+ ntokens = len(self.group.tokens)
+ for tgroup, alignment in translations:
+ if isinstance(tgroup, str):
+ # First find the target Group object
+ tgroup = self.target.groupnames[tgroup]
+ # Make any TNodes required
+ nttokens = len(tgroup.tokens)
+ tnodes = []
+ if nttokens > ntokens:
+ # Target group has more nodes than source group.
+ # Indices of groups that are not empty.
+ full_t_indices = set(alignment['align'])
+ empty_t_indices = set(range(nttokens)) - full_t_indices
+ for i in empty_t_indices:
+ empty_t_token = tgroup.tokens[i]
+ empty_t_feats = tgroup.features[i] if tgroup.features
else None
+ tnodes.append(TNode(empty_t_token, empty_t_feats,
self, i))
+ # Deal with individual gnodes in the group
+ gnodes = []
+ for gn_index, gnode in enumerate(self.nodes):
+ # Align gnodes with target tokens and features
+ tokens = tgroup.tokens
+ features = tgroup.features
+ targ_index = alignment['align'][gn_index]
+ if targ_index < 0:
+ # This means there's no target language token
+ continue
+ agrs = alignment['agr'][gn_index] if 'agr' in alignment
else None
+ token = tokens[targ_index]
+ feats = features[targ_index] if features else None
+ gnodes.append((gnode, token, feats, agrs, targ_index))
+ self.translations.append((tgroup, gnodes, tnodes))
+
+class GNode:
+
+ """Representation of a single node (word, position) within a GInst
object."""
+
+ def __init__(self, ginst, index, snodes):
+ self.ginst = ginst
+ self.index = index
+ self.sentence = ginst.sentence
+ self.snode_indices = [s[0] for s in snodes]
+ self.snode_anal = [s[1] for s in snodes]
+ # Whether this is the head of the group
+ self.head = index == ginst.group.head_index
+ # Group word, etc. associated with this node
+ self.token = ginst.group.tokens[index]
+ # Whether the associated token is abstract (a category)
+ self.cat = Entry.is_cat(self.token)
+ # Features associated with this group node
+ groupfeats = ginst.group.features
+ if groupfeats:
+ self.features = groupfeats[index]
+ else:
+ self.features = None
+ self.variables = {}
+ # List of target-language token and features associated with this
gnode
+# self.translations = []
+
+ def __repr__(self):
+ return "{}|{}".format(self.ginst, self.token)
+
+ ## Create IVars and (set) Vars with sentence DS as root DS
+
+ def ivar(self, key, name, domain, ess=False):
+ self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
+ essential=ess)
+
+ def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
+ ess=False):
+ self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
+ rootDS=self.sentence.dstore,
+ essential=ess)
+
+ def create_variables(self, verbosity=0):
+ nsnodes = len(self.sentence.nodes)
+ # SNode index for this GNode
+ self.ivar('snodes', "gn{}->w".format(self.sent_index),
set(self.snode_indices))
+ if self.cat:
+ # Concrete nodes merged with this abstract node
+ self.svar('merge_cgn', 'gn{}_cgmerge'.format(self.sent_index),
+ set(), {gn.sent_index for gn in self.sentence.gnodes
if not gn.cat},
+ 0, 1)
+ self.svar('merge_cw', 'gn{}_cwmerge'.format(self.sent_index),
+ set(), set(range(nsnodes)),
+ 0, 1)
+ self.variables['merge_agn'] = EMPTY
+ self.variables['merge_aw'] = EMPTY
+ else:
+ # Abstract nodes merged with this concrete node
+ self.svar('merge_agn', 'gn{}_agmerge'.format(self.sent_index),
+ # indices of all abstract nodes
+ set(), {gn.sent_index for gn in self.sentence.gnodes
if gn.cat},
+ 0, 1)
+ self.svar('merge_aw', 'gn{}_awmerge'.format(self.sent_index),
+ set(), set(range(nsnodes)),
+ 0, 1)
+ self.variables['merge_cgn'] = EMPTY
+ self.variables['merge_cw'] = EMPTY
+
+class TNode:
+
+ """Representation of a node within a target language group that doesn't
+ have a corresponding node in the source language group that it's the
+ translation of."""
+
+ def __init__(self, token, features, ginst, index):
+ self.token = token
+ self.features = features
+ self.ginst = ginst
+ self.sentence = ginst.sentence
+ self.index = index
+
+ def generate(self, verbosity=0):
+ """Generate forms for the TNode."""
+ if self.features:
+ return self.sentence.target.generate(self.token, self.features)
+ else:
+ return [self.token]
+
+ def __repr__(self):
+ return "~{}|{}".format(self.ginst, self.token)
+
+class Solution:
+
+ """A non-conflicting set of groups for a sentence, at most one instance
+ GNode for each sentence token, exactly one sentence token for each
obligatory
+ GNode in a selected group. Created when a complete variable
assignment.get('features'))
+ is found for a sentence."""
+
+ def __init__(self, sentence, ginsts, s2gnodes, index):
+ self.sentence = sentence
+ # List of sets of gnode indices
+ self.s2gnodes = s2gnodes
+ self.ginsts = ginsts
+ self.index = index
+ # A list of pairs for each snode: (gnodes, features)
+ self.snodes = []
+ # List of Translation objects; multiple translations are possible
+ # for a given solution because of multiple translations for groups
+ self.translations = []
+
+ def __repr__(self):
+ return "|< {} >|({})".format(self.sentence.raw, self.index)
+
+ def translate(self, verbosity=0):
+ """Do everything you need to create the translation."""
+ self.merge_nodes(verbosity=verbosity)
+ for ginst in self.ginsts:
+ ginst.set_translations(verbosity=verbosity)
+ self.make_translations(verbosity=verbosity)
+
+ def make_translations(self, verbosity=0):
+ """Combine GInsts for each translation in translation products, and
+ separate gnodes into a dict for each translation."""
+ if verbosity:
+ print("Making translations for {}".format(self))
+ translations = itertools.product(*[g.translations for g in
self.ginsts])
+ for index, translation in enumerate(translations):
+ t = Translation(self, translation, index, verbosity=verbosity)
+ t.initialize(verbosity=verbosity)
+ t.realize(verbosity=verbosity)
+ self.translations.append(t)
+
+ def merge_nodes(self, verbosity=0):
+ """Merge the source features of cat and inst GNodes associated
with each SNode."""
+ if verbosity:
+ print("Merging target nodes for {}".format(self))
+ for snode, gn_indices in zip(self.sentence.nodes, self.s2gnodes):
+ # gn_indices is either one or two ints indexing gnodes in
self.gnodes
+ gnodes = [self.sentence.gnodes[index] for index in gn_indices]
+ features = []
+ for gnode in gnodes:
+# print("gnode {}, snode_anal {}".format(gnode,
gnode.snode_anal))
+ snode_indices = gnode.snode_indices
+ snode_index = snode_indices.index(snode.index)
+ snode_anal = gnode.snode_anal[snode_index]
+ if snode_anal:
+# print("snode_anal {}".format(snode_anal))
+ features.append(snode_anal[1])
+ # Could this fail??
+ features = Features.unify_all(features)
+ self.snodes.append((gnodes, features))
+
+class Translation:
+ """Representation of a single translation for an input sentence.
+ Multiple translations are possible with a single Solution."""
+
+ def __init__(self, solution, attribs, index, verbosity=0):
+ self.solution = solution
+ self.index = index
+ self.sentence = solution.sentence
+ self.verbosity = verbosity
+ # Create GNode dict and list of target group, gnodes and tnodes
+ # from attributes
+ self.gnode_dict = {}
+ self.groups_tnodes = []
+ for tgroup, tgnodes, tnodes in attribs:
+ for tgnode, tokens, feats, agrs, t_index in tgnodes:
+ self.gnode_dict[tgnode] = (tgroup, tokens, feats, agrs,
t_index)
+ self.groups_tnodes.append((tgroup, tnodes))
+ # form list / order constraint pairs for each sentence position
+ self.nodes = []
+ # Ordered units: merged groups or uncovered words
+ self.chunks = []
+ # pairs of node indices representing order constraints
+ self.order_pairs = []
+ # Root domain store for variables
+ self.dstore = DStore(name="T{}".format(self.index))
+ # Order variables for each node
+ self.variables = []
+ # Order and disjunction constraints
+ self.constraints = []
+ # Translation needs a solver to figure out positions of words
+ self.solver = Solver(self.constraints, self.dstore,
+ description='for target realization',
+ verbosity=verbosity)
+ # Final output
+ self.output = None
+
+ def __repr__(self):
+ return "{}[{}] ->".format(self.solution, self.index)
+
+ def display(self):
+ print("{} {}".format(self, self.out_string()))
+
+ def out_string(self):
+ '''Convert output to a string for pretty printing.'''
+ l = []
+ for word_list in self.output:
+ if len(word_list) == 1:
+ l.append(word_list[0])
+ else:
+ l.append('|'.join(word_list))
+ return ' '.join(l)
+
+ def initialize(self, verbosity=0):
+ """Set up everything needed to run the constraints and generate
the translation."""
+ if verbosity:
+ print("Initializing translation {}".format(self))
+ self.build(verbosity=verbosity)
+ self.set_chunks(verbosity=verbosity)
+ self.make_order_pairs(verbosity=verbosity)
+ self.create_variables(verbosity=verbosity)
+ self.create_constraints(verbosity=verbosity)
+
+ def build(self, verbosity=0):
+ """Unify translation features for merged nodes, map agr features
from source to target,
+ generate surface target forms from resulting roots and features."""
+ if verbosity:
+ print('Building {}'.format(self))
+ tginsts, tgnodes, trans_index = self.groups_tnodes,
self.gnode_dict, self.index
+ # Figure out the target forms for each snode
+ for snode, (gnodes, features) in zip(self.sentence.nodes,
self.solution.snodes):
+ if not gnodes:
+ # snode is not covered by any group
***The diff for this file has been truncated for email.***
=======================================
--- /dev/null
+++ /hiiktuu/ui.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,206 @@
+#
+# Hiiktuu UI: initial attempt at a user interface for creating languages
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.02.15
+# -- Created
+# 2014.03.04
+# -- UI class
+# 2014.03.18
+# -- Adding groups
+
+from .language import *
+import os, sys
+
+class UI:
+ """Normally only one of these so doesn't have to be a class. Later a
subclass
+ of tkinter Frame?"""
+
+ # Editing the grammar/lexicon
+ edit_mode = 0
+ # Parsing and translating
+ proc_mode = 1
+
+ def __init__(self):
+ self.languages = {}
+ self.mode = UI.edit_mode
+
+ @staticmethod
+ def yes(response):
+ return not response or response[0].lower() == 'y'
+
+ def load_language(self):
+ abbrev = input("Give abbreviation for language.\n>> ")
+ path = os.path.join(LANGUAGE_DIR, abbrev + '.lg')
+ try:
+ language = Language.read(path)
+ self.languages[abbrev] = language
+ return language
+ except IOError:
+ print("That language doesn't seem to exist.")
+ return
+
+ def quit(self):
+ """Quit the UI (and L3Lite)."""
+ response = input("Are you sure you want to quit L3Lite? ")
+ if UI.yes(response):
+ self.write_languages()
+ sys.exit()
+
+ def write_languages():
+ """Write the languages the user wants to save."""
+ for language in self.languages.values():
+ if language.changed:
+ response = input("{} has been changed;
save?\n>> ".format(language.name))
+ if UI.yes(response):
+ language.write(LANGUAGE_DIR)
+
+ def add_word(self, language):
+ word = input("Write the word to be added to the lexicon.\n>> ")
+ if word in language.words:
+ response = input("There's already a word with that form in the
lexicon; add another? ")
+ if UI.yes(response):
+ return self.add_word1(word, language)
+ return
+ else:
+ return self.add_word1(word, language)
+
+ def add_word1(self, word, language):
+ cls = None
+ response = input("Do you want to assign a class to the word? ")
+ if UI.yes(response):
+ class_names = list(language.classes.keys())
+ cls = input("Choose from these
classes:\n{}\n>> ".format(' '.join(class_names)))
+ return language.add_word(word, cls=cls)
+
+ def add_class(self, language):
+ name = input("Write the name of the class to be added to the
lexicon.\n>> ")
+ if name in self.language.classes:
+ response = input("There's already a class with that name in
the lexicon; add a class with a different name? ")
+ if UI.yes(response):
+ return self.add_class1(name, language)
+ return
+ else:
+ return self.add_class1(name, language)
+
+ def add_class1(self, name, language):
+ return language.add_class(name)
+
+ def add_group(self, language):
+ """Get the words that will be in the group. make_group() creates
the group."""
+ words = input(
+ """Write the words, lexemes, or classes in the group in their
typical order.
+Precede any lexemes with % and any classes with $.
+>> """)
+ words = words.split()
+ response = input("Are these the words you want in the
group?\n{}\n".format(', '.join(words)))
+ if UI.yes(response):
+ return self.make_group(language, words)
+ else:
+ return self.add_group(language)
+
+ def make_group(self, language, words, word_string=''):
+ if not word_string:
+ word_list = []
+ for i, w in enumerate(words):
+ word_list.append("[{}] {}".format(i+1, w))
+ word_string = '\n'.join(word_list)
+ head_index = input("Give the number of the word or lexeme that is
the head of the group.\n{}\n>> ".format(word_string))
+ if not head_index.isdigit():
+ print("You need to give a number between 1 and
{}".format(len(words)))
+ return self.make_group(language, words,
word_string=word_string)
+ else:
+ head_index = int(head_index)
+ if head_index > len(words):
+ print("You need to give a number between 1 and
{}".format(len(words)))
+ return self.make_group(language, words,
word_string=word_string)
+ else:
+ head_index = head_index - 1
+ head = words[head_index]
+ name = '_'.join(words)
+ print("OK, the head is '{}'".format(head))
+ print("Creating group {} with head {}".format(name, head))
+ group = language.add_group(name, head,
head_lexeme=head.startswith(LEXEME_PRE), head_order=head_index)
+ # A dictionary to associate order of words within the
group with their IDs (indices).
+ order2index = {head_index: 0}
+ for index, word in enumerate(words):
+ if word == head:
+ continue
+ word_id = group.add_word(word, order=index)
+ order2index[index] = word_id
+ response = input("Create dependencies among words?\n")
+ if response:
+ return self.add_group_deps(group, word_string,
order2index=order2index)
+ else:
+ return self.add_group_deps(group, word_string,
first=False, finished=True, order2index=order2index)
+
+ def add_group_deps(self, group, word_string, first=True,
finished=False, order2index=None):
+ if not first:
+ if not finished:
+ response = input("Finished with dependencies? ")
+ if UI.yes(response):
+ finished = True
+ if finished:
+ for index, (lex, feats) in group.words.items():
+ # For each word in the group, make sure it's either
+ # the group head or that it has a mother within the
+ # group.
+ if index != 0 and 'm' not in feats:
+ print("Making word {} a daughter of head with
default dependency".format(feats['o'] + 1))
+ group.add_dep(0, index)
+ return group
+ else:
+ return self.add_group_dep(group, word_string,
order2index=order2index)
+ else:
+ return self.add_group_dep(group, word_string,
order2index=order2index)
+
+ def add_group_dep(self, group, word_string, src_index=None,
dest_index=None, order2index=None):
+ if src_index is None:
+ src_index = input("Give the index of the source word for a
dependency.\n{}\n>> ".format(word_string))
+ if not src_index.isdigit() or int(src_index) >
len(group.words):
+ print("You need to give a number between 1 and
{}".format(len(group.words)))
+ return self.add_group_dep(group, word_string,
order2index=order2index)
+ else:
+ src_index = int(src_index) - 1
+ if dest_index is None:
+ dest_index = input("Give the index of the destination
word for the dependency.\n{}\n>> ".format(word_string))
+ if not dest_index.isdigit() or int(dest_index) >
len(group.words):
+ print("You need to give a number between 1 and
{}".format(len(group.words)))
+ return self.add_group_dep(group, word_string,
src_index=src_index, order2index=order2index)
+ else:
+ dest_index = int(dest_index) - 1
+ dep = input("If you want a particular dependency
type, enter it.\n>> ")
+ if not dep:
+ dep = Entry.dflt_dep
+ response = input("OK to create dependency of type
{} from word {} to word {}?\n".format(dep, src_index + 1, dest_index + 1))
+ if UI.yes(response):
+ s = order2index[src_index]
+ d = order2index[dest_index]
+ # Actually create the dependency
+ group.add_dep(s, d, dep=dep)
+ return self.add_group_deps(group, word_string,
first=False, order2index=order2index)
+
+
+
=======================================
--- /dev/null
+++ /hiiktuu/variable.py Tue May 6 07:09:41 2014 UTC
@@ -0,0 +1,862 @@
+#
+# Hiiktuu variables and domain stores: required for constraint
satisfaction.
+#
+########################################################################
+#
+# This file is part of the HLTDI L^3 project
+# for parsing, generation, translation, and computer-assisted
+# human translation.
+#
+# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# =========================================================================
+
+# 2014.02.14
+# -- Created. Copied from l3xdg/variable.py.
+# 2014.03.26
+# -- One variable class (SVar from l3xdg).
+# 2014.05.04-5
+# -- List variables; needed so they can include non-hashable elements,
+# in particular, dicts and Features objects: LVar, DetLVar
+
+# Maximum number of values for a variable.
+MAX = 200
+# Maximum set of integers
+ALL = set(range(MAX))
+
+class DStore:
+ """Domain store holding domains for variables. (Really the domains are
held in
+ dicts kept by the variables.)"""
+
+ def __init__(self, name='', level=0, problem=None, parent=None):
+ """This store is a strengthening of parent store if there is
one."""
+ self.problem = problem
+ self.parent = parent
+ self.children = []
+ self.name = name
+ self.level = level
+ # Undetermined variables
+ self.undetermined = []
+ # Essential undetermined variables
+ self.ess_undet = []
+
+ def __repr__(self):
+ return '@ {}/{}'.format(self.name, self.level)
+
+ def is_determined(self, essential=True, verbosity=0):
+ """Are all variables in dstore determined that need to be
determined?"""
+ if essential:
+ if self.ess_undet:
+ if verbosity:
+ print('{} has {} undetermined variables'.format(self,
len(self.ess_undet)))
+ return False
+ else:
+ return True
+ elif self.undetermined:
+ if verbosity:
+ print('{} has {} undetermined variables'.format(self,
len(self.undetermined)))
+ return False
+ return True
+
+ def clone(self, constraint=None, name='', project=False, verbosity=0):
+ """Create a new dstore by applying the basic constraint
+ to the bindings in this store."""
+ new_store = DStore(name=name or self.name, level=self.level+1,
+ problem=self.problem, parent=self)
+ self.children.append(new_store)
+ new_store.undetermined = self.undetermined[:]
+ new_store.ess_undet = self.ess_undet[:]
+ constraint.infer(dstore=new_store, verbosity=0, tracevar=[])
+ for var in constraint.variables:
+ # See if the new variable(?s) is now determined
+ var.determined(dstore=new_store, verbosity=0)
+ return new_store
+
+DS0 = DStore(name='top')
+
+class Var:
+
+ # Threshold for "peripheral" variables
+ weight_thresh = .5
+
+ def __init__(self, name,
+ lower_domain=None, upper_domain=None,
+ lower_card=0, upper_card=MAX,
+ problem=None, dstores=None, rootDS=None,
+ constraint=None,
+ # Whether a complete solution depends on a single value
for this variable
+ essential=True,
+ # Vars with low weights are "peripheral".
+ weight=1):
+ self.name = name
+ self.problem = problem
+# if problem:
+# self.problem.add_variable(self)
+ self.constraints = [constraint] if constraint else []
+ self.essential = essential
+ self.value = None
+ # Normally initialize with a top-level domain store
+ self.rootDS = rootDS or DS0
+ # Values of this variable in different domain stores
+ self.dstores = dstores or {self.rootDS: {}}
+ # Add the variable to the list of undetermined variables for
+ # the dstore
+ self.rootDS.undetermined.append(self)
+ if essential:
+ self.rootDS.ess_undet.append(self)
+ self.weight = weight
+ if lower_domain != None:
+ self.lower_domain = lower_domain
+ else:
+ self.lower_domain = set()
+ if upper_domain != None:
+ self.upper_domain = upper_domain
+ else:
+ self.upper_domain = ALL.copy()
+ self.init_lower_card = max(lower_card, len(self.lower_domain))
+ self.init_upper_card = min(upper_card, len(self.upper_domain))
+ self.max = MAX
+ self.init_values(dstore=self.rootDS)
+
+ def __repr__(self):
+ return '${}'.format(self.name)
+
+ # Initializing bounds
+
+ def init_values(self, dstore=None):
+ self.set_lower(self.lower_domain, dstore=dstore)
+ self.set_upper(self.upper_domain, dstore=dstore)
+ self.set_lower_card(self.init_lower_card, dstore=dstore)
+ self.set_upper_card(self.init_upper_card, dstore=dstore)
+ self.set_value(None, dstore=dstore)
+
+ def set_lower(self, lower, dstore=None):
+ self.set(dstore, 'lower', lower)
+
+ def set_upper(self, upper, dstore=None):
+ self.set(dstore, 'upper', upper)
+
+ def set_lower_card(self, lower_card, dstore=None):
+ self.set(dstore, 'lower_card', lower_card)
+
+ def set_upper_card(self, upper_card, dstore=None):
+ self.set(dstore, 'upper_card', upper_card)
+
+ def get_name(self):
+ '''Function used in sorting lists of variables.'''
+ return self.name
+
+ def get_dstore(self, dstore):
+ """Returns the dictionary of value and domain(s) for dstore."""
+ dstore = dstore or self.rootDS
+ return self.dstores.get(dstore, {})
+
+ def add_dstore(self, dstore):
+ """Adds a domain store to the dstores dict."""
+ self.dstores[dstore] = {}
+
+ def set(self, dstore, feature, value):
+ """Sets feature to be value in dstore, creating a dict for dstore
if one doesn't exist."""
+ dstore = dstore or self.rootDS
+ dsdict = self.dstores.get(dstore, None)
+ if dsdict == None:
+ dsdict = {'value': None}
+ self.dstores[dstore] = dsdict
+ dsdict[feature] = value
+
+ def set_value(self, value, dstore=None):
+ """Sets the value of the variable in dstore."""
+ self.set(dstore, 'value', value)
+
+ def is_determined(self, dstore=None):
+ """Is the variable already determined?"""
+ return self.get_value(dstore=dstore) is not None
+
+ def all_equal(self, variables, dstore=None):
+ """Do all of these variables have the same value as this in
dstore?"""
+ return all([self.equals(var, dstore=dstore) for var in variables])
+
+ def equals(self, var, dstore=None):
+ """Does this variable have the same value as var in dstore?
+ """
+ value = self.get_value(dstore=dstore)
+ if value != None:
+ var_val = var.get_value(dstore=dstore)
+ if var_val == None:
+ return False
+ if var.get_lower_card(dstore=dstore) > 1:
+ return False
+ if value == var_val:
+ return True
+ return False
+
+ ## How constraints on a variable can fail
+
+ def bound_fail(self, dstore=None):
+ """Fail if the lower bound includes any elements not in the upper
bound."""
+ return self.get_lower(dstore=dstore) -
self.get_upper(dstore=dstore)
+
+ def card_fail(self, dstore=None):
+ """Fail if the lower cardinality bound is greater than the upper
cardinality bound."""
+ return self.get_lower_card(dstore=dstore) >
self.get_upper_card(dstore=dstore)
+
+ def upper_bound_card_fail(self, dstore=None):
+ """Fail if the length of upper bound < lower card."""
+ return len(self.get_upper(dstore=dstore)) <
self.get_lower_card(dstore=dstore)
+
+ def lower_bound_card_fail(self, dstore=None):
+ """Fail if length of lower bound > upper card."""
+ return len(self.get_lower(dstore=dstore)) >
self.get_upper_card(dstore=dstore)
+
+ def fail(self, dstore=None):
+ """Fail in one of three ways."""
+ return self.bound_fail(dstore=dstore) or
self.card_fail(dstore=dstore)
+# or self.bound_card_fail(dstore=dstore)
+
+ ## Getters
+
+ def get(self, dstore, feature, default=None):
+ """Returns a value for feature associated with dstore, recursively
+ checking dstore's parent is nothing is found."""
+ dstore_dict = self.dstores.get(dstore, {})
+ x = dstore_dict.get(feature, None)
+ if x != None:
+ return x
+ parent = dstore.parent
+ if parent:
+ return self.get(parent, feature, default=default)
+ return default
+
+ def get_value(self, dstore=None):
+ """Return the value of the variable in dstore."""
+ dstore = dstore or self.rootDS
+ return self.get(dstore, 'value', None)
+
+ def get_lower(self, dstore=None):
+ dstore = dstore or self.rootDS
+ return self.get(dstore, 'lower')
+
+ def get_upper(self, dstore=None):
+ dstore = dstore or self.rootDS
+ return self.get(dstore, 'upper')
+
+ def get_lower_card(self, dstore=None):
+ dstore = dstore or self.rootDS
+ return self.get(dstore, 'lower_card', 0)
+
+ def get_upper_card(self, dstore=None):
+ dstore = dstore or self.rootDS
+ return self.get(dstore, 'upper_card', MAX)
+
+ def get_undecided(self, dstore=None):
+ """Returns the set of values that may or may not be in the
variable."""
+ dstore = dstore or self.rootDS
+ return self.get_upper(dstore=dstore) -
self.get_lower(dstore=dstore)
+
+ def det_update(self, dstore=None):
+ if dstore:
+ dstore.undetermined.remove(self)
+ if self.essential:
+ dstore.ess_undet.remove(self)
+
+ def determined(self, dstore=None, constraint=None, verbosity=0):
+ """Attempt to determine the variable, returning the value if this
is possible,
+ False if it's not."""
+ val = self.get_value(dstore=dstore)
+ if val != None:
+ return val
+ def determined_help(value, dst, verb):
+ value_card = len(value)
+ lower_card = self.get_lower_card(dstore=dst)
+ upper_card = self.get_upper_card(dstore=dst)
+ if value_card < lower_card:
+ s = "{} lowering lower card for {} to {}, less than
previous value {}"
+ raise(VarError(s.format(constraint, self, value_card,
lower_card)))
+ if value_card > upper_card:
+ s = "{} raising upper card for {} to {}, greater than
previous value {}"
+ raise(VarError(s.format(constraint, self, value_card,
upper_card)))
+ self.set_value(value, dstore=dst)
+ self.set_lower(value, dstore=dst)
+ self.set_upper(value, dstore=dst)
+ self.set_lower_card(value_card, dstore=dst)
+ self.set_upper_card(value_card, dstore=dst)
+ if verb > 1:
+ print(' {} is determined at {}'.format(self, value))
+ self.det_update(dstore=dst)
+ return value
+ lower = self.get_lower(dstore=dstore)
+ upper = self.get_upper(dstore=dstore)
+ if lower == None or upper == None:
+ return False
+ # If upper and lower bounds are equal, determine at either
+ if lower == upper:
+ return determined_help(lower, dstore, verbosity)
+ # Combine cardinality and set bounds to determine
+ # If the length of the upper bound is <= the lower cardinality
bound,
+ # then make the upper bound the value
+ if len(upper) <= self.get_lower_card(dstore=dstore):
+ return determined_help(upper, dstore, verbosity)
+ if len(lower) >= self.get_upper_card(dstore=dstore):
+ return determined_help(lower, dstore, verbosity)
+ return False
+
+ ## Methods that can change the variable's set bounds or cardinality
bounds
+
+ def determine(self, value, dstore=None, constraint=None):
+ """Attempt to determine the variable as value, returning False it
can't be
+ or if it's already determined."""
+ if self.is_determined(dstore=dstore):
+ return False
+ value = value if isinstance(value, set) else {value}
+ orig_upper = self.get_upper(dstore=dstore)
+ orig_lower = self.get_lower(dstore=dstore)
+ upper = self.get_upper(dstore=dstore)
+ if not value.issubset(orig_upper):
+ # Var can't be determined at this value
+ return False
+ if constraint:
+ print(' {} determining {} as {}'.format(constraint, self,
value))
+ val_card = len(value)
+ self.set_lower(value, dstore=dstore)
+ self.set_upper(value, dstore=dstore)
+ self.set_value(value, dstore=dstore)
+ self.set_lower_card(val_card, dstore=dstore)
+ self.set_upper_card(val_card, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ if orig_upper != value or orig_lower != value:
+ return True
+ return False
+
+ def strengthen_upper(self, upper2, dstore=None, constraint=None,
+ reduce=False, det=False):
+ """Strengthens the upper bound by intersecting it with upper2.
+ If det is True, attempt to determine variable.
+ """
+ upper = self.get_upper(dstore=dstore)
+ if not isinstance(upper, set):
+ print("{}'s upper {} is not set".format(self, upper))
+ if not upper.issubset(upper2):
+ new_upper = upper.intersection(upper2)
+ lower_card = self.get_lower_card(dstore=dstore)
+ if new_upper == upper:
+ return False
+ lower = self.get_lower(dstore=dstore)
+ if not lower.issubset(new_upper) and constraint:
+ s = 'Warning: attempting to change upper bound of {} to
{}, which is not a superset of lower bound {}'
+ print(s.format(self, new_upper, lower))
+ if len(new_upper) < lower_card and constraint:
+ s = 'Warning: attempting to change upper bound of {} to
{}, which is smaller than lower card {}'
+ print(s.format(self, new_upper, lower_card))
+ if constraint:
+ s = ' {} strengthening upper bound of {} ({}) with {},
now {}'
+ print(s.format(constraint, self, upper, upper2, new_upper))
+ self.set_upper(new_upper, dstore=dstore)
+ if det:
+ if new_upper == lower:
+# print('Determining', self)
+ val_len = len(lower)
+ self.set_value(lower, dstore=dstore)
+ self.set_lower_card(val_len, dstore=dstore)
+ self.set_upper_card(val_len, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ elif len(new_upper) == lower_card:
+ val_len = lower_card
+ self.set_lower(new_upper, dstore=dstore)
+ self.set_value(new_upper, dstore=dstore)
+ self.set_upper_card(val_len, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ def discard_upper(self, value, dstore=None, constraint=None):
+ """Discard set or element from upper bound."""
+ upper = self.get_upper(dstore=dstore)
+ value = value if isinstance(value, set) else {value}
+ if value & upper:
+ new_upper = upper - value
+ new_upper_card = len(new_upper)
+ lower = self.get_lower(dstore=dstore)
+ if new_upper_card < len(lower) and constraint:
+ s = 'Warning: attempting to discard {} from upper bound {}
of {}, making it smaller than lower bound {}'
+ print(s.format(value, upper, self, lower))
+ lower_card = self.get_lower_card(dstore=dstore)
+ if new_upper_card < lower_card:
+ s = 'Warning: attempting to discard {} from upper bound {}
of {}, making cardinality smaller than {}'
+ print(s.format(value, upper, self, lower_card))
+ # If value and upper overlap
+ if constraint:
+ print(' {} discarding {} from {}'.format(constraint,
value, self))
+ self.set_upper(new_upper, dstore=dstore)
+ self.set_upper_card(new_upper_card, dstore=dstore)
+ return True
+ return False
+
+ def strengthen_lower(self, lower2, dstore=None, constraint=None,
det=False):
+ """Strengthens the lower bound by unioning it with lower2."""
+ lower = self.get_lower(dstore=dstore)
+ if not lower.issuperset(lower2):
+ new_lower = lower.union(lower2)
+ upper = self.get_upper(dstore=dstore)
+ upper_card = self.get_upper_card(dstore=dstore)
+ if not new_lower.issubset(upper) and constraint:
+ s = 'Warning: attempting to change lower bound of {} to
{}, which is not a subset of upper bound {}'
+ print(s.format(self, new_lower, upper))
+ if len(new_lower) > upper_card and constraint:
+ s = 'Warning: attempting to change lower bound of {} to
{}, which is greater than upper card {}'
+ print(s.format(self, new_lower, upper_card))
+ if constraint:
+ print(' {} strengthening lower bound of {} with
{}'.format(constraint, self, lower2))
+ self.set_lower(new_lower, dstore=dstore)
+ if det:
+ if new_lower == upper and upper_card ==
self.lower_card(dstore=dstore):
+ self.set_value(upper, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ def strengthen_lower_card(self, lower2, dstore=None, constraint=None,
det=False):
+ """Raises the lower bound on the cardinality of the set."""
+ if lower2 > self.get_lower_card(dstore=dstore):
+ if constraint:
+ print(' {} raising lower cardinality bound of {} to
{}'.format(constraint, self, lower2))
+ self.set_lower_card(lower2, dstore=dstore)
+ if det:
+ upper_card = self.get_upper_card(dstore=dstore)
+ if lower2 == upper_card:
+ upper = self.get_upper(dstore=dstore)
+ if len(upper) == upper_card:
+ # Determine
+ self.set_lower(upper, dstore=dstore)
+ self.set_value(upper, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ def strengthen_upper_card(self, upper2, dstore=None, constraint=None,
det=False):
+ """Lowers the upper bound on the cardinality of the set."""
+ if upper2 < self.get_upper_card(dstore=dstore):
+ if constraint:
+ print(' {} lowering upper cardinality bound of {} to
{}'.format(constraint, self, upper2))
+ self.set_upper_card(upper2, dstore=dstore)
+ if det:
+ lower_card = self.get_lower_card(dstore=dstore)
+ if upper2 == lower_card:
+ lower = self.get_lower(dstore=dstore)
+ if len(lower) == lower_card:
+ # Determine
+ self.set_upper(lower, dstore=dstore)
+ self.set_value(lower, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ ## Printing
+
+ @staticmethod
+ def string_range(lower, upper):
+ s = '{'
+ for i,v in enumerate(upper):
+ if i != 0:
+ s += ','
+ if v not in lower:
+ s += '({})'.format(v)
+ else:
+ s += '{}'.format(v)
+ return s + '}'
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}${1}:{2}|{3},{4}|'.format(spaces*' ',
+ self.name,
+
Var.string_range(self.get_lower(dstore=dstore),
+
self.get_upper(dstore=dstore)),
+
self.get_lower_card(dstore=dstore),
+
self.get_upper_card(dstore=dstore))
+
+ def pprint(self, dstore=None, spaces=0, end='\n'):
+ print(self.pretty_string(dstore=dstore, spaces=spaces, end=end))
+
+class IVar(Var):
+
+ def __init__(self, name, domain=None,
+ problem=None, dstores=None, rootDS=None,
+ # Vars with low weights are "peripheral".
+ weight=1, essential=True):
+ Var.__init__(self, name,
+ lower_domain=set(), upper_domain=domain,
+ lower_card=1, upper_card=1,
+ problem=problem, dstores=dstores, rootDS=rootDS,
+ weight=weight, essential=essential)
+
+ def __repr__(self):
+ return '#{}'.format(self.name)
+
+ def equals(self, var, dstore=None, pattern=False):
+ """Does this variable have the same value as var in dstore?
+ var could be an IVar."""
+ value = self.get_value(dstore=dstore)
+ var_val = var.get_value(dstore=dstore)
+ is_ivar = isinstance(var, IVar)
+ if value != None and var_val != None:
+ if value == var_val:
+ return True
+ return False
+
+ def determined(self, dstore=None, constraint=None, verbosity=0):
+ """Attempt to determine the variable, returning the value if this
is possible,
+ False if it's not."""
+ val = self.get_value(dstore=dstore)
+ if val != None:
+ return val
+ upper = self.get_upper(dstore=dstore)
+ if len(upper) == 1:
+ self.set_value(upper, dstore=dstore)
+ self.set_lower(upper, dstore=dstore)
+ if verbosity > 1:
+ print(' {} is determined at {}'.format(self, upper))
+ if dstore:
+ self.det_update(dstore)
+ return upper
+ return False
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}#{1}:{2}'.format(spaces*' ',
+ self.name,
+ self.get_upper(dstore=dstore))
+
+class LVar(Var):
+ """Variable with list values, rather than set."""
+
+ def __init__(self, name,
+ lower_domain=None, upper_domain=None,
+ lower_card=0, upper_card=MAX,
+ problem=None, dstores=None, rootDS=None,
+ constraint=None,
+ # Whether a complete solution depends on a single value
for this variable
+ essential=True,
+ # Vars with low weights are "peripheral".
+ weight=1):
+ Var.__init__(self, name,
+ lower_domain=lower_domain, upper_domain=upper_domain,
+ lower_card=lower_card, upper_card=upper_card,
+ problem=problem, dstores=dstores, rootDS=rootDS,
+ constraint=constraint,
+ essential=essential,
+ weight=weight)
+
+ def __repr__(self):
+ return 'L{}'.format(self.name)
+
+ # Most methods work for both sets and lists
+
+ def equals(self, v):
+ """We need something corresponding to set equality. The lists
+ might be different lengths because of duplicates."""
+ shared = []
+ for x in self:
+ if x in v:
+ shared.append(x)
+ else:
+ return False
+ for x in v:
+ if x not in shared:
+ return False
+ return True
+
+ def bound_fail(self, dstore=None):
+ """Fail if the lower bound includes any elements not in the upper
bound."""
+ return len(self.get_lower(dstore=dstore)) >
len(self.get_upper(dstore=dstore))
+
+ def get_undecided(self, dstore=None):
+ """Returns the set of values that may or may not be in the
variable."""
+ dstore = dstore or self.rootDS
+ return [x for x in self.get_upper(dstore=dstore) if x not in
self.get_lower(dstore=dstore)]
+
+ def determine(self, value, dstore=None, constraint=None):
+ """Attempt to determine the variable as value, returning False it
can't be
+ or if it's already determined."""
+ if self.is_determined(dstore=dstore):
+ return False
+ value = value if isinstance(value, set) else {value}
+ orig_upper = self.get_upper(dstore=dstore)
+ orig_lower = self.get_lower(dstore=dstore)
+ upper = self.get_upper(dstore=dstore)
+ if not all([(x in orig_upper) for x in value]):
+ # Var can't be determined at this value
+ return False
+ if constraint:
+ print(' {} determining {} as {}'.format(constraint, self,
value))
+ val_card = len(value)
+ self.set_lower(value, dstore=dstore)
+ self.set_upper(value, dstore=dstore)
+ self.set_value(value, dstore=dstore)
+ self.set_lower_card(val_card, dstore=dstore)
+ self.set_upper_card(val_card, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ if not orig_upper.equals(value) or not orig_lower.equals(value):
+ return True
+ return False
+
+ def strengthen_upper(self, upper2, dstore=None, constraint=None,
+ reduce=False, det=False):
+ """Strengthens the upper bound by intersecting it with upper2.
+ If det is True, attempt to determine variable.
+ """
+ upper = self.get_upper(dstore=dstore)
+ if not isinstance(upper, set):
+ print("{}'s upper {} is not set".format(self, upper))
+ if not all([(x in upper2) for x in upper]):
+# if not upper.issubset(upper2):
+ new_upper = [y for y in upper2 if y in upper]
+# new_upper = upper.intersection(upper2)
+ lower_card = self.get_lower_card(dstore=dstore)
+ if new_upper == upper:
+ return False
+ lower = self.get_lower(dstore=dstore)
+ if not all([(y in new_upper) for y in lower]):
+# if not lower.issubset(new_upper) and constraint:
+ s = 'Warning: attempting to change upper bound of {} to
{}, which is not a superset of lower bound {}'
+ print(s.format(self, new_upper, lower))
+ if len(new_upper) < lower_card and constraint:
+ s = 'Warning: attempting to change upper bound of {} to
{}, which is smaller than lower card {}'
+ print(s.format(self, new_upper, lower_card))
+ if constraint:
+ s = ' {} strengthening upper bound of {} ({}) with {},
now {}'
+ print(s.format(constraint, self, upper, upper2, new_upper))
+ self.set_upper(new_upper, dstore=dstore)
+ if det:
+ if new_upper.equals(lower):
+# print('Determining', self)
+ val_len = len(lower)
+ self.set_value(lower, dstore=dstore)
+ self.set_lower_card(val_len, dstore=dstore)
+ self.set_upper_card(val_len, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ elif len(new_upper) == lower_card:
+ val_len = lower_card
+ self.set_lower(new_upper, dstore=dstore)
+ self.set_value(new_upper, dstore=dstore)
+ self.set_upper_card(val_len, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ def discard_upper(self, value, dstore=None, constraint=None):
+ """Discard set or element from upper bound."""
+ upper = self.get_upper(dstore=dstore)
+ value = value if isinstance(value, list) else [value]
+ if any([x in upper] for x in value):
+# if value & upper:
+ new_upper = [y for y in upper if y not in value]
+# new_upper = upper - value
+ new_upper_card = len(new_upper)
+ lower = self.get_lower(dstore=dstore)
+ if new_upper_card < len(lower) and constraint:
+ s = 'Warning: attempting to discard {} from upper bound {}
of {}, making it smaller than lower bound {}'
+ print(s.format(value, upper, self, lower))
+ lower_card = self.get_lower_card(dstore=dstore)
+ if new_upper_card < lower_card:
+ s = 'Warning: attempting to discard {} from upper bound {}
of {}, making cardinality smaller than {}'
+ print(s.format(value, upper, self, lower_card))
+ # If value and upper overlap
+ if constraint:
+ print(' {} discarding {} from {}'.format(constraint,
value, self))
+ self.set_upper(new_upper, dstore=dstore)
+ self.set_upper_card(new_upper_card, dstore=dstore)
+ return True
+ return False
+
+ def strengthen_lower(self, lower2, dstore=None, constraint=None,
det=False):
+ """Strengthens the lower bound by unioning it with lower2."""
+ lower = self.get_lower(dstore=dstore)
+ if not all([(x in lower) for x in lower2]):
+# if not lower.issuperset(lower2):
+ # eliminate duplicates
+ new_lower = lower + [y for y in lower2 if y not in lower]
+# new_lower = lower.union(lower2)
+ upper = self.get_upper(dstore=dstore)
+ upper_card = self.get_upper_card(dstore=dstore)
+ if not all([(y in new_lower) for y in upper]):
+# if not new_lower.issubset(upper) and constraint:
+ s = 'Warning: attempting to change lower bound of {} to
{}, which is not a subset of upper bound {}'
+ print(s.format(self, new_lower, upper))
+ if len(new_lower) > upper_card and constraint:
+ s = 'Warning: attempting to change lower bound of {} to
{}, which is greater than upper card {}'
+ print(s.format(self, new_lower, upper_card))
+ if constraint:
+ print(' {} strengthening lower bound of {} with
{}'.format(constraint, self, lower2))
+ self.set_lower(new_lower, dstore=dstore)
+ if det:
+ if new_lower.equals(upper) and upper_card ==
self.lower_card(dstore=dstore):
+ self.set_value(upper, dstore=dstore)
+ if dstore and self in dstore.undetermined:
+ self.det_update(dstore)
+ return True
+ return False
+
+ @staticmethod
+ def string_range(lower, upper):
+ s = '['
+ for i,v in enumerate(upper):
+ if i != 0:
+ s += ','
+ if v not in lower:
+ s += '({})'.format(v)
+ else:
+ s += '{}'.format(v)
+ return s + ']'
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}L{1}:{2}|{3},{4}|'.format(spaces*' ',
+ self.name,
+
LVar.string_range(self.get_lower(dstore=dstore),
+
self.get_upper(dstore=dstore)),
+
self.get_lower_card(dstore=dstore),
+
self.get_upper_card(dstore=dstore))
+
+### Variables that are pre-determined.
+
+class DetVar(Var):
+ """Pre-determined variable. If DStore is not specified in constructor,
+ the variable is determined in all DStores. Should not be modified."""
+
+ def __init__(self, name, value, dstore=None):
+ Var.__init__(self, name, rootDS=dstore)
+ self.dstore = dstore
+ if self.dstore:
+ self.determine(value, dstore=dstore)
+ else:
+ self.value = value
+ self.lower_domain = value
+ self.upper_domain = value
+ self.set_cards(value)
+
+ def __repr__(self):
+ return '$!{}:{}'.format(self.name, self.value)
+
+ def set_cards(self, value):
+ self.init_upper_card = len(value)
+ self.init_lower_card = len(value)
+
+ def init_values(self, dstore=None):
+ # Don't do anything
+ pass
+
+ def set(self, dstore, feature, value):
+ """Override set in Variable to prevent changes."""
+ # This message should print out under some verbosity conditions.
+ s = 'Warning: attempting to change pre-determined variable {},
feature: {}, value: {}, orig value: {}'
+ print(s.format(self, feature, value, self.get(dstore, feature)))
+ return False
+
+ def is_determined(self, dstore=None):
+ return True
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}$!{1}:{2}'.format(spaces*' ', self.name,
self.get(dstore, 'value'))
+
+ def cost(self, dstore=None):
+ return 0
+
+ def determined(self, dstore=None, verbosity=0, constraint=None):
+ if self.dstore:
+ return Var.determined(self, dstore=dstore,
verbosity=verbosity, constraint=constraint)
+ return self.value
+
+ def get(self, dstore, feature, default=None):
+ if self.dstore:
+ return Var.get(self, dstore, feature, default=default)
+ if feature in {'value', 'lower', 'upper'}:
+ return self.value
+ if feature in {'lower_card', 'upper_card'}:
+ return len(self.value)
+
+ def get_undecided(self, dstore=None):
+ return set()
+
+class DetIVar(DetVar, IVar):
+
+ def __init__(self, name='', value=0, dstore=None):
+ IVar.__init__(self, name, rootDS=dstore)
+ # value could be the empty set
+ if not isinstance(value, set):
+ value = {value}
+ DetVar.__init__(self, name, value, dstore)
+ self.init_domain = value
+ self.default_value = value
+
+ def __repr__(self):
+ return '#!{}:{}'.format(self.name, list(self.value)[0])
+
+ def init_values(self, dstore=None):
+ # Don't do anything
+ pass
+
+ def set_cards(self, value):
+ self.init_upper_card = 1
+ self.init_lower_card = 1
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}#!{1}:{2}'.format(spaces*' ', self.name,
self.get(dstore, 'value'))
+
+ def get(self, dstore, feature, default=None):
+ if self.dstore:
+ return IVar.get(self, dstore, feature, default=default)
+ if feature == 'value':
+ return self.value
+ if feature in ('dom', 'upper', 'lower'):
+ if isinstance(self.value, set):
+ return self.value
+ else:
+ return {self.value}
+ if feature in ('lower_card', 'upper_card'):
+ return 1
+
+class DetLVar(DetVar):
+ """Pre-determined list variable. If DStore is not specified in
constructor,
+ the variable is determined in all DStores. Should not be modified."""
+
+ def __init__(self, name, value, dstore=None):
+ DetVar.__init__(self, name, value, dstore=dstore)
+
+ def __repr__(self):
+ return 'L!{}:{}'.format(self.name, self.value)
+
+ def pretty_string(self, dstore=None, spaces=0, end='\n'):
+ return '{0}L!{1}:{2}'.format(spaces*' ', self.name,
self.get(dstore, 'value'))
+
+ def get_undecided(self, dstore=None):
+ return []
+
+class VarError(Exception):
+ '''Class for errors encountered when attempting to execute an event on
a variable.'''
+
+ def __init__(self, value):
+ self.value = value
+
+ def __str__(self):
+ return repr(self.value)
+
+# Constant variables, determined in all DStores
+EMPTY = DetVar("empty", set())
=======================================
--- /l3lite/__init__.py Sun Apr 20 07:07:10 2014 UTC
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Do-it-yourself L3. Create simple bilingual lexicons and grammars for
language pairs."""
-
-__all__ =
['language', 'entry', 'ui', 'constraint', 'variable', 'sentence', 'features']
-
-from .sentence import *
=======================================
--- /l3lite/constraint.py Thu May 1 06:39:44 2014 UTC
+++ /dev/null
@@ -1,2677 +0,0 @@
-#
-# L3Lite constraints.
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.03.27
-# -- Created. Initially just copied from l3xdg/constraint.py.
-# 2014.03.29
-# -- Fixed cant_precede() so it works with IVars (determined and not).
-# 2014.04.03
-# -- Created ComplexSetConvexity
-# 2014.04.05
-# -- Created ComplexUnionSelection
-# 2014.04.15
-# -- Constraint types used so far:
-# UnionSelection, PrecedenceSelection, ComplexUnionSelection,
-# ComplexSetConvexity, Union, Disjoint, Inclusion
-# 2014.04.26
-# -- Fixed several bugs in SetPrecedence (needed for TL sequencing).
-# 2014.04.30
-# -- Eliminated lots of unused constraints.
-# Fixed complex constraints so that sub-constraints are not recorded
-# in their variables.
-
-from .variable import *
-import itertools
-
-class Constraint:
-
- # Constants for outcome of running
- failed = 0
- entailed = 1
- sleeping = 2
-
- # Constant threshold for lenience
- lenience = .5
-
- def __init__(self, variables, problem=None, record=True, weight=1):
- self.variables = variables
- self.problem = problem
- self.weight = weight
- if record:
- for var in variables:
- if isinstance(var, DetVar):
- continue
-# if problem:
-# if var not in problem.vrs:
-# problem.vrs[var] = []
-# problem.vrs[var].append(self)
- var.constraints.append(self)
- self.name = ''
-
- def __repr__(self):
- return self.name
-
- def is_lenient(self):
- return self.weight < Constraint.lenience
-
- def set_weight(self, weight):
- self.weight = weight
-
- def get_var(self):
- """The single variable for this constraint."""
- return self.variables[0]
-
- # Each Constraint type must implement fails(), is_entailed(), and
infer().
-
- def fails(self, dstore=None):
- raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
-
- def is_entailed(self, dstore=None):
- raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
-
- def infer(self, dstore=None, verbosity=0, tracevar=None):
- """Should return state and variables that change."""
- raise NotImplementedError("{} is an abstract
class".format(self.__class__.__name__))
-
- def determine(self, dstore=None, verbosity=0, tracevar=None):
- """Try to determine each variable, returning the set if any
determined."""
- det = set()
- for variable in self.variables:
- if not variable.is_determined(dstore=dstore) and \
- variable.determined(dstore=dstore, constraint=self,
verbosity=verbosity) is not False:
- if verbosity and variable in tracevar:
- print(' {} determining {} at {}'.format(self,
variable, variable.get_value(dstore=dstore)))
- det.add(variable)
- return det
-
- def run(self, dstore=None, verbosity=0, tracevar=[]):
- """Run this constraint during constraint satisfaction."""
- if verbosity > 1:
- print(' Running {}'.format(self))
- determined = self.determine(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
- # Try to determine the variables; if any are determined, go to
sleep and return
- # the set of newly determined variables.
- if determined:
- if verbosity > 1:
- print(' Determined variables', determined)
- return Constraint.sleeping, determined
- # Otherwise see if the constraint fails. If it does fail and
return the empty set.
- if self.fails(dstore=dstore):
- if verbosity > 1:
- print(' Failed!')
- elif verbosity:
- print('{} failed; weight: {}'.format(self, self.weight))
- return Constraint.failed, set()
- # Otherwise see if the constraint is entailed. If it is, succeed
and return the empty set.
- if self.is_entailed(dstore=dstore):
- if verbosity > 1:
- print(' Entailed')
- return Constraint.entailed, set()
- # Otherwise try inferring variable values. Either succeed or sleep
and return any changed
- # variables.
- return self.infer(dstore=dstore, verbosity=verbosity,
tracevar=tracevar)
-
- @staticmethod
- def string_set(s):
- """Convenient print name for a set."""
- if len(s) > 10:
- return '{{{0}...{1}}}'.format(min(s), max(s))
- else:
- return '{}'.format(set.__repr__(s))
-
- def print_vars(self):
- '''Print out components of constraint variables.'''
- for v in self.variables:
- print('{} :: {}'.format(v, v.dstores))
-
-## Primitive basic constraints
-
-# Integer domains
-
-class Member(Constraint):
-
- def __init__(self, var, domain, problem=None, record=True):
- """
- var: an IVar
- domain: a set of ints
- """
- Constraint.__init__(self, (var,), problem=problem, record=record)
- self.domain = domain
- self.name = '{0}<{1}'.format(self.get_var(),
Constraint.string_set(self.domain))
-
- def fails(self, dstore=None):
- """Is the constraint domain not a superset of the variable's
domain?"""
- if not
self.domain.issubset(self.get_var().get_domain(dstore=dstore)):
- return True
- return False
-
- def is_entailed(self, dstore=None):
- """Is the variable's domain a subset of the constraint's domain?"""
- if self.get_var().get_domain(dstore=dstore).issubset(self.domain):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=None):
- """The variable's values are restricted to the intersection of
- their current values and the constraint's domain."""
- var = self.get_var()
- if var.strengthen(self.domain, dstore=dstore,
constraint=(verbosity>1 or var in tracevar) and self):
- return Constraint.entailed, {var}
- return Constraint.entailed, set()
-
-# Set domains
-
-class Superset(Constraint):
- """Set variable is constrained to be a superset of subset."""
-
- def __init__(self, var, subset, problem=None, record=True):
- """
- var: a SVar
- subset: a set of ints
- """
- Constraint.__init__(self, (var,), problem=problem, record=record)
- self.subset = subset
- self.name = '{0} >= {1}'.format(self.get_var(),
Constraint.string_set(self.subset))
-
- def fails(self, dstore=None):
- """Is the constraint subset not a subset of the var's upper
bound?"""
- if not
self.subset.issubset(self.get_var().get_upper(dstore=dstore)):
- return True
- return False
-
- def is_entailed(self, dstore=None):
- """Is the variable's lower bound a superset of the constraint's
subset?"""
- if self.get_var().get_lower(dstore=dstore).issuperset(self.subset):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=None):
- """The variable's values are restricted to be a superset of the
union
- of the current lower bound and subset."""
- var = self.get_var()
- if var.strengthen_lower(self.subset, dstore=dstore,
- constraint=(verbosity>1 or var in
tracevar) and self):
- return Constraint.entailed, {var}
- return Constraint.entailed, set()
-
-class Subset(Constraint):
- """Set variable is constrained to be a subset of superset."""
-
- def __init__(self, var, superset, problem=None, record=True):
- """
- var: a SVar
- superset: a set of ints
- """
- Constraint.__init__(self, (var,), problem=problem, record=record)
- self.superset = superset
- self.name = '{0} c= {1}'.format(self.get_var(),
Constraint.string_set(self.superset))
-
- def fails(self, dstore=None):
- """Is the var's lower bound not a subset of the constraint
superset?"""
- if not
self.get_var().get_lower(dstore=dstore).issubset(self.superset):
- return True
- return False
-
- def is_entailed(self, dstore=None):
- """Is the variable's upper bound a subset of the constraint's
superset?"""
- if self.get_var().get_upper(dstore=dstore).issubset(self.superset):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=None):
- """The variable's values are restricted to be a subset of the
intersection
- of the current upper bound and superset."""
- var = self.get_var()
- if var.strengthen_upper(self.superset, dstore=dstore,
constraint=(verbosity>1 or var in tracevar) and self):
- return Constraint.entailed, {var}
- return Constraint.entailed, set()
-
-### Set cardinality
-##
-##class CardinalityGEQ(Constraint):
-## """Set variable's cardinality is constrained to be >= lower bound."""
-##
-## def __init__(self, var, lower, problem=None):
-## Constraint.__init__(self, (var,), problem=problem)
-## self.lower = lower
-## self.name = '|{0}|>={1}'.format(self.get_var(), self.lower)
-##
-## def fails(self, dstore=None):
-## """Is the var's upper cardinality bound < lower?"""
-## if self.get_var().get_upper_card(dstore=dstore) < self.lower:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is the variable's lower cardinality bound already >= lower?"""
-## if self.get_var().get_lower_card(dstore=dstore) >= self.lower:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## """The variable's cardinality is restricted be >= lower: lower
bound
-## is raised if necessary."""
-## var = self.get_var()
-## if var.strengthen_lower_card(self.lower, dstore=dstore,
-## constraint=(verbosity>1 or var in
tracevar) and self):
-## return Constraint.entailed, {var}
-## return Constraint.entailed, set()
-##
-##class CardinalityLEQ(Constraint):
-## """Set variable's cardinality is constrained to be <= upper bound."""
-##
-## def __init__(self, var, upper, problem=None):
-## Constraint.__init__(self, (var,), problem=problem)
-## self.upper = upper
-## self.name = '|{0}| c= {1}'.format(self.get_var(), self.upper)
-##
-## def fails(self, dstore=None):
-## """Is the var's lower cardinality bound > upper?"""
-## if self.get_var().get_lower_card(dstore=dstore) > self.upper:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is the variable's upper cardinality bound already <= upper?"""
-## if self.get_var().get_upper_card(dstore=dstore) <= self.upper:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## """The variable's cardinality is restricted to be <= upper:
-## upper bound is lowered if necessary."""
-## var = self.get_var()
-## if var.strengthen_upper_card(self.upper, dstore=dstore,
-## constraint=(verbosity>1 or var in
tracevar) and self):
-## return Constraint.entailed, {var}
-## return Constraint.entailed, set()
-##
-##### Constraints that propagate
-##
-#### Primitive propagators
-##
-### Integer domain variables only
-##
-##class LessThan(Constraint):
-## """IVar1 is less than or equal to IVar2."""
-##
-## def __init__(self, variables, problem=None, weight=1):
-## Constraint.__init__(self, variables, problem=problem,
-## weight=weight)
-## self.name = '{0} <= {1}'.format(self.get_iv1(), self.get_iv2())
-##
-## def get_iv1(self):
-## return self.variables[0]
-##
-## def get_iv2(self):
-## return self.variables[1]
-##
-## def fails(self, dstore=None):
-## """
-## Fail if min of domain1 > max of domain2.
-## """
-## iv1 = self.get_iv1()
-## iv2 = self.get_iv2()
-## min1 = min(iv1.get_domain(dstore=dstore))
-## max2 = max(iv2.get_domain(dstore=dstore))
-## if min1 > max2:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Entailed if max of domain1 <= min of domain2."""
-## iv1 = self.get_iv1()
-## iv2 = self.get_iv2()
-## max1 = max(iv1.get_domain(dstore=dstore))
-## min2 = min(iv2.get_domain(dstore=dstore))
-## if max1 <= min2:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## changed = set()
-## iv1 = self.get_iv1()
-## iv2 = self.get_iv2()
-## d1 = iv1.get_domain(dstore=dstore)
-## d2 = iv2.get_domain(dstore=dstore)
-## # iv2 must be between the min of iv1's domain and the maximum
value
-## iv2_values = set(range(min(d1), max(d2) + 1))
-## if iv2.strengthen(iv2_values, dstore=dstore,
-## constraint=(verbosity>1 or iv2 in tracevar)
and self):
-## changed.add(iv2)
-## # iv1 must be between the min of its domain and the max of iv2's
domain
-## # (iv2's domain may have changed)
-## iv1_values = set(range(min(d1),
max(iv2.get_domain(dstore=dstore)) + 1))
-## # Maximum value of sv2's upper bound constrains sv1's upper card
-## if iv1.strengthen(iv1_values, dstore=dstore,
-## constraint=(verbosity>1 or iv1 in tracevar)
and self):
-## changed.add(iv1)
-##
-## if verbosity > 1 and changed:
-## print(' Variables {} changed'.format(changed))
-## return Constraint.sleeping, changed
-##
-##class CardinalityEq(Constraint):
-## """Set variable's cardinality is constrained to be equal to value of
IVar."""
-##
-## def __init__(self, variables, problem=None, weight=1):
-## Constraint.__init__(self, variables, problem=problem,
-## weight=weight)
-## self.sv = variables[0]
-## self.iv = variables[1]
-## self.name = '|{0}| = {1}'.format(self.sv, self.iv)
-##
-## def fails(self, dstore=None):
-## """Is the sv's lower cardinality bound > max of iv's domain?"""
-## if self.iv.determined(dstore=dstore) is not False and
self.sv.determined(dstore=dstore) is not False:
-### print('Both vars determined: {}, {}'.format(self.iv,
self.sv))
-## if self.iv.get_value(dstore=dstore) !=
self.sv.get_upper_card(dstore=dstore):
-## return True
-## if self.sv.get_lower_card(dstore=dstore) >
max(self.iv.get_domain(dstore=dstore)):
-## return True
-## if min(self.iv.get_domain(dstore=dstore)) >
self.sv.get_upper_card(dstore=dstore):
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Is the variable's upper cardinality bound already = iv?"""
-## if self.iv.determined(dstore=dstore) is not False and
self.sv.determined(dstore=dstore) is not False:
-## if self.sv.get_upper_card(dstore=dstore) ==
self.iv.get_value(dstore=dstore):
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## """sv's upper cardinality is restricted to be <= min of iv's
domain.
-## iv's domain is restricted to values >= lower cardinality of
sv."""
-## state = Constraint.sleeping
-## changed = set()
-## sv = self.sv
-## iv = self.iv
-## sv_low_card = sv.get_lower_card(dstore=dstore)
-## sv_up_card = sv.get_upper_card(dstore=dstore)
-## if iv.strengthen(set(range(sv_low_card, sv.max)), dstore=dstore,
-## constraint=(verbosity>1 or iv in tracevar) and
self):
-## changed.add(iv)
-## return state, changed
-## if iv.strengthen(set(range(0, sv_up_card + 1)), dstore=dstore,
-## constraint=(verbosity>1 or iv in tracevar) and
self):
-## changed.add(iv)
-## return state, changed
-## iv_dom = iv.get_domain(dstore=dstore)
-## if sv.strengthen_lower_card(min(iv_dom), dstore=dstore,
-## constraint=(verbosity>1 or sv in
tracevar) and self):
-## changed.add(sv)
-## return state, changed
-## if sv.strengthen_upper_card(max(iv_dom), dstore=dstore,
-## constraint=(verbosity>1 or sv in
tracevar) and self):
-## changed.add(sv)
-## return state, changed
-
-# Set domain variables only
-
-class SetConvexity(Constraint):
- """There must not be any 'holes' in the (single) set variable, which
represents
- the positions of the descendants of a node as well as that of the node
itself."""
-
- def __init__(self, var, problem=None, weight=1, record=True):
- """Only one variable, so a special constructor."""
- Constraint.__init__(self, [var], problem=problem, weight=weight,
record=record)
- self.var = self.variables[0]
- self.name = '{0} <>'.format(self.var)
-
- def fails(self, dstore=None):
- """Four ways to fail."""
- # If the variable is determined and has holes...
- if self.var.determined(dstore=dstore, constraint=self) is not
False:
- val = self.var.get_value(dstore=dstore)
- # There can't be any holes
- if val:
- val_range = set(range(min(val), max(val)+1))
- if val_range - val:
- return True
- lower_card = self.var.get_lower_card(dstore=dstore)
- lower = self.var.get_lower(dstore=dstore)
- upper = self.var.get_upper(dstore=dstore)
- if lower:
- # Necessary range includes all values between the minimum and
the maximum (inclusive)
- # of the lower bound
- neces_range = set(range(min(lower), max(lower)+1))
- if neces_range - upper:
- # If there's some value in necessary range not in upper
bound...
- return True
- # Possible values that are not in necessary range
- possible = upper - neces_range
- # If there's a gap separating max necessary and min possible
and too many possible
- # values would need to be discarded...
- if possible and neces_range:
- min_poss = min(possible)
- max_neces = max(neces_range)
- if min_poss - max_neces > 1:
- if len(upper) - len(possible) < lower_card:
- return True
- # If there is continuous sequence of integers as long as the lower
cardinality...
- if lower_card <= 1:
- return False
- upper_ordered = list(upper)
- upper_ordered.sort()
- last = upper_ordered[0]
- count = 1
- for pos in upper_ordered[1:]:
- if count >= lower_card:
- return False
- if pos - last > 1:
- count = 1
- last = pos
- else:
- count += 1
- last = pos
- if count >= lower_card:
- return False
- return True
-
- def is_entailed(self, dstore=None):
- """If the variable is determined, or if the lower bound is convex,
- and the upper only adds a single vowel below or above the lower
bound."""
- if self.var.determined(dstore=dstore, constraint=self) is not
False:
- return True
- lower = self.var.get_lower(dstore=dstore)
- upper = self.var.get_upper(dstore=dstore)
- if not lower:
- return False
- min_lower = min(lower)
- max_lower = max(lower)
- if not set(range(min_lower, max_lower+1)) - lower:
- if min_lower - min(upper) <= 1 and max(upper) - max_lower <= 1:
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=[]):
- changed = set()
- # If the variable's lower bound is non-empty, every value between
- # the min and max of the lower bound must be in the variable, and
- # there can't be any gaps in the upper bound either.
- v = self.var
- lower = v.get_lower(dstore=dstore)
- if len(lower) > 0:
- upper = v.get_upper(dstore=dstore)
- min_low = min(lower)
- max_low = max(lower)
- # Make the lower bound everything between the min and max
- if v.strengthen_lower(set(range(min_low, max_low+1)),
- dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
- changed.add(v)
- return Constraint.sleeping, changed
-
- # Look for gaps in the upper bound
- # Starting at the max of the lower bound...
- max_up = max(upper)
- x = max_low+1
- while x in upper and x < max_up:
- x += 1
- if x < max_up:
- if v.discard_upper(set(range(x, max_up+1)),
- dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
- changed.add(v)
- return Constraint.sleeping, changed
- # Starting at the min of the lower bound...
- min_up = min(upper)
- x = min_low-1
- while x in upper and x > min_up:
- x -= 1
- if x > min_up + 1:
- if v.discard_upper(set(range(min_up, x)),
- dstore=dstore, constraint=(verbosity>1
or v in tracevar) and self):
- changed.add(v)
- return Constraint.sleeping, changed
-
- return Constraint.sleeping, changed
-
-class SupersetIntersection(Constraint):
- """Set var S1 is superset of intersection of set vars S2 and S3."""
-
- def __init__(self, variables, problem=None, weight=1, record=True):
- Constraint.__init__(self, variables, problem=problem,
- weight=weight, record=record)
- self.name = '{0} >= {1} ^ {2}'.format(self.variables[0],
self.variables[1], self.variables[2])
-
- def fails(self, dstore=None):
- """Is the intersection of the lower bounds of S2 and S3 not a
subset of
- the upper bound of S1?"""
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- s2_inters_s3 = s2.get_lower(dstore=dstore) &
s3.get_lower(dstore=dstore)
- if not s2_inters_s3 <= s1.get_upper(dstore=dstore):
- return True
- # Fail on cardinalities
- if s1.get_upper_card(dstore=dstore) < len(s2_inters_s3):
- return True
- return False
-
- def is_entailed(self, dstore=None):
- """Is the intersection of the upper bounds of S2 and S3 already a
subset of
- the lower bound of S1?"""
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- if s2.get_upper(dstore=dstore) & s3.get_upper(dstore=dstore) <=
s1.get_lower(dstore=dstore):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=[]):
- changed = set()
- # Intersection of lower bound of S2 and S3 is subset of lower
bound of S1.
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- if s1.strengthen_lower(s2.get_lower(dstore=dstore) &
s3.get_lower(dstore=dstore),
- dstore=dstore, constraint=(verbosity>1 or
s1 in tracevar) and self):
- changed.add(s1)
- # Upper bound of S2 and S3 excludes elements which are in the
lower bounds of S3 and S2, respectively,
- # but not in the upper bound of S1.
- s1_up = s1.get_upper(dstore=dstore)
- s2_not_s1 = s2.get_lower(dstore=dstore) - s1_up
- s3_not_s1 = s3.get_lower(dstore=dstore) - s1_up
- for x in s3.get_upper(dstore=dstore).copy():
- if x in s2_not_s1:
- if s3.discard_upper(x, dstore=dstore,
constraint=(verbosity>1 or s3 in tracevar) and self):
- changed.add(s3)
- for x in s2.get_upper(dstore=dstore).copy():
- if x in s3_not_s1:
- if s2.discard_upper(x, dstore=dstore,
constraint=(verbosity>1 or s2 in tracevar) and self):
- changed.add(s2)
- # Inference based on cardinalities (from Müller, p. 104)
- s2Us3_card = len(s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore))
- s1_up_card = s1.get_upper_card(dstore=dstore)
- s2_low_card = s2.get_lower_card(dstore=dstore)
- s3_low_card = s3.get_lower_card(dstore=dstore)
- if s1.strengthen_lower_card(s2_low_card + s3_low_card - s2Us3_card,
- dstore=dstore, constraint=(verbosity>1
or s1 in tracevar) and self):
- changed.add(s1)
- if s2.strengthen_upper_card(s2Us3_card + s1_up_card - s3_low_card,
- dstore=dstore, constraint=(verbosity>1
or s2 in tracevar) and self):
- changed.add(s2)
- if s3.strengthen_upper_card(s2Us3_card + s1_up_card - s2_low_card,
- dstore=dstore, constraint=(verbosity>1
or s3 in tracevar) and self):
- changed.add(s3)
- if verbosity > 1 and changed:
- print(' Variables {} changed'.format(changed))
- return Constraint.sleeping, changed
-
-class SubsetUnion(Constraint):
- """Set var S1 is subset of union of set vars S2 and S3."""
-
- def __init__(self, variables, problem=None, propagate=True,
- weight=1, record=True):
- Constraint.__init__(self, variables, problem=problem,
weight=weight, record=record)
- self.name = '{0} c= {1} U {2}'.format(self.variables[0],
self.variables[1], self.variables[2])
-
- def fails(self, dstore=None):
- """Is the union of the upper bounds of S2 and S3 (the biggest it
can be)
- not a superset of the lower bound of S1?"""
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- s2_union_s3 = s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore)
- if not s2_union_s3 >= s1.get_lower(dstore=dstore):
- return True
- # Fail on cardinalities
- if s1.get_lower_card(dstore=dstore) > len(s2_union_s3):
- return True
- return False
-
- def is_entailed(self, dstore=None):
- """Is the union of the lower bounds of S2 and S3 already a
superset of
- the upper bound of S1?"""
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- if s2.get_lower(dstore=dstore) | s3.get_lower(dstore=dstore) >=
s1.get_upper(dstore=dstore):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=[]):
- changed = set()
- # S1 must be a subset of the union of the upper bounds of S2 and S3
- s1 = self.variables[0]
- s2 = self.variables[1]
- s3 = self.variables[2]
- if s1.strengthen_upper(s2.get_upper(dstore=dstore) |
s3.get_upper(dstore=dstore),
- dstore=dstore, constraint=(verbosity>1 or
s1 in tracevar) and self):
- changed.add(s1)
- # S2's and S3's lower bounds must contain elements that are in the
lower bound of S1 but not
- # S3 and S2, respectively (note: Müller has *lower* bounds of S3
and S2 (Eq. 11.17, p. 105),
- # but this seems too strong).
- s1_not_s2 = s1.get_lower(dstore=dstore) -
s2.get_upper(dstore=dstore)
- s1_not_s3 = s1.get_lower(dstore=dstore) -
s3.get_upper(dstore=dstore)
- if s3.strengthen_lower(s1_not_s2, dstore=dstore,
constraint=(verbosity>1 or s3 in tracevar) and self):
- changed.add(s3)
- if s2.strengthen_lower(s1_not_s3, dstore=dstore,
constraint=(verbosity>1 or s2 in tracevar) and self):
- changed.add(s2)
- # Inference based on cardinalities (from Müller, p. 105, but
there's apparently
- # a typo; in Eq. 11.19, n1 should be the upper, not the lower
bound of S1)
- if s1.strengthen_upper_card(s2.get_upper_card(dstore=dstore) +
s3.get_upper_card(dstore=dstore),
- dstore=dstore, constraint=(verbosity>1
or s1 in tracevar) and self):
- changed.add(s1)
- if s2.strengthen_lower_card(s1.get_lower_card(dstore=dstore) -
s3.get_lower_card(dstore=dstore),
- dstore=dstore, constraint=(verbosity>1
or s2 in tracevar) and self):
- changed.add(s2)
- if s3.strengthen_lower_card(s1.get_lower_card(dstore=dstore) -
s2.get_lower_card(dstore=dstore),
- dstore=dstore, constraint=(verbosity>1
or s3 in tracevar) and self):
- changed.add(s3)
- if verbosity > 1 and changed:
- print(' Variables {} changed'.format(changed))
- return Constraint.sleeping, changed
-
-##class CardinalitySubset(Constraint):
-## """Cardinality of set variable 1 is within set variable 2. This
constraint is not included
-## in Müller, but it is needed for XDG valency.
-## It could be handled with IVMemberSV."""
-##
-## def __init__(self, variables, problem=None, weight=1):
-## Constraint.__init__(self, variables, problem=problem,
-## weight=weight)
-## self.name = '|{0}| c= {1}'.format(self.get_sv1(), self.get_sv2())
-##
-## def get_sv1(self):
-## return self.variables[0]
-##
-## def get_sv2(self):
-## return self.variables[1]
-##
-## def fails(self, dstore=None):
-## """Fail if minimum cardinality of SV1 is greater than maximum
possible value of SV2
-## or if maximum cardinality of SV1 is less than the minimum
possible value of SV2.
-## Fixed 2011.12.09: minimum possible value of SV2 is minimum of
*upper* bound, not
-## lower bound."""
-## sv1 = self.get_sv1()
-## sv2 = self.get_sv2()
-## upper2 = sv2.get_upper(dstore=dstore)
-## max2card = max(upper2) if upper2 else 0
-## if sv1.get_lower_card(dstore=dstore) > max2card:
-## return True
-### lower2 = sv2.get_lower(dstore=dstore)
-## min2card = min(upper2) if upper2 else 0
-## # min(lower2) if lower2 else 0
-## if sv1.get_upper_card(dstore=dstore) < min2card:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Entailed if cardinality of SV1 determined, SV2 determined,
and the former is in the latter."""
-## sv1 = self.get_sv1()
-## sv2 = self.get_sv2()
-## if sv2.determined(dstore=dstore, constraint=self) is not False
and \
-## sv1.get_lower_card(dstore=dstore) ==
sv1.get_upper_card(dstore=dstore) in sv2.get_value(dstore=dstore):
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## changed = set()
-## state = Constraint.sleeping
-## sv1 = self.get_sv1()
-## sv2 = self.get_sv2()
-## sv1_low_card = sv1.get_lower_card(dstore=dstore)
-## sv1_up_card = sv1.get_upper_card(dstore=dstore)
-### if tracevar in self.variables:
-### print(self, 'INFERRING')
-## # If sv1's cardinality is determined, then it must be in sv2
-## if sv1_low_card == sv1_up_card:
-### print('SV1 {} has same upper and lower card {}'.format(sv1,
sv1_low_card))
-## if sv2.strengthen_lower({sv1_low_card}, dstore=dstore,
-## constraint=(verbosity>1 or sv2 in
tracevar) and self):
-### constraint=self):
-### if sv2.determine({sv1_low_card}, dstore=dstore,
-### constraint=(verbosity>1 or sv2 in
tracevar) and self):
-## changed.add(sv2)
-## return state, changed
-##
-### if tracevar in self.variables:
-### print(self, 'GOT TO 0')
-##
-## sv2_upper = sv2.get_upper(dstore=dstore)
-### sv2_lower = sv2.get_lower(dstore=dstore)
-##
-## # Minimum value of sv2 constrains sv1's lower card
-## # Fixed 2011.12.09: minimum value of sv2 is min of *upper*
bound, not lower
-## if sv2_upper:
-## # Could be empty set, in which case no strengthening is
possible
-## if sv1.strengthen_lower_card(min(sv2_upper), dstore=dstore,
-## constraint=(verbosity>1 or sv1
in tracevar) and self):
-## changed.add(sv1)
-## return state, changed
-##
-### if tracevar in self.variables:
-### print(self, 'GOT TO 1')
-## # Maximum value of sv2's upper bound constrains sv1's upper card
-## upcard = max(sv2_upper) if sv2_upper else 0
-## if sv1.strengthen_upper_card(upcard, dstore=dstore,
constraint=(verbosity>1 or sv1 in tracevar) and self):
-## changed.add(sv1)
-## return state, changed
-### if tracevar in self.variables:
-### print(self, 'GOT TO 2')
-##
-## if verbosity > 1 and changed:
-## print(' Variables {} changed'.format(changed))
-## return state, changed
-
-class SetPrecedence(Constraint):
- """All elements of set variable 1 must precede all elements of set
variable 2."""
-
- def __init__(self, variables, problem=None, weight=1, record=True):
- Constraint.__init__(self, variables, problem=problem,
- weight=weight, record=record)
- self.name = '{0} << {1}'.format(self.variables[0],
self.variables[1])
-
- # Also used in PrecedenceSelection
-
- @staticmethod
- def must_precede(svar1, svar2, dstore=None):
- """Is the highest value that can occur in svar1 < the lowest value
that can occur in svar2?"""
- v1_upper = svar1.get_upper(dstore=dstore)
- v2_upper = svar2.get_upper(dstore=dstore)
- return v1_upper and v2_upper and (max(v1_upper) < min(v2_upper))
-
- @staticmethod
- def cant_precede(var1, var2, dstore=None):
- """Is the highest value that must occur in var1 >= the lowest
value that must occur in var2?"""
- # Lower
- if isinstance(var1, IVar):
- v1 = min(var1.get_upper(dstore=dstore))
- elif not var1.get_lower(dstore=dstore):
- return False
- else:
- v1 = max(var1.get_lower(dstore=dstore))
- # Upper
- if isinstance(var2, IVar):
- v2 = max(var2.get_upper(dstore=dstore))
- elif not var2.get_lower(dstore=dstore):
- return False
- else:
- v2 = min(var2.get_lower(dstore=dstore))
- return v1 >= v2
-# return v1_lower and v2_lower and (max(v1_lower) >= min(v2_lower))
-
- def fails(self, dstore=None):
- """Fail if any of set1's lower bound > any of set2's lower
bound."""
- return SetPrecedence.cant_precede(self.variables[0],
self.variables[1], dstore=dstore)
-
- def is_entailed(self, dstore=None):
- """Entailed if everything that can be in set1 precedes anything
that can be in set2."""
- return SetPrecedence.must_precede(self.variables[0],
self.variables[1], dstore=dstore)
-
- def infer(self, dstore=None, verbosity=0, tracevar=[]):
- changed = set()
- state = Constraint.sleeping
- v1 = self.variables[0]
- v1_low = v1.get_lower(dstore=dstore)
- v2 = self.variables[1]
- v2_low = v2.get_lower(dstore=dstore)
- # If the lower bound on v1 is not empty, v2 must be a subset of
- # {min(MAX, max(v1 + 1)), ..., MAX}
- if v1_low:
- v2_up_new = range(min([v1.max, max(v1_low) + 1]), v2.max+1)
- if v2.strengthen_upper(v2_up_new, dstore=dstore,
- constraint=(verbosity>1 or v2 in
tracevar) and self):
- changed.add(v2)
- return state, changed
- # If the lower bound on v2 is not empty, v1 must be a subset of
- # {0, ..., max(0, min(v2_low) - 1)}
- if v2_low:
- v1_up_new = range(0, max([0, min(v2_low) - 1]) + 1)
- if v1.strengthen_upper(v1_up_new, dstore=dstore,
- constraint=(verbosity>1 or v1 in
tracevar) and self):
- changed.add(v1)
- return state, changed
- # Remove all elements from v1 >= highest possible element in v2
- v1_up = v1.get_upper(dstore=dstore)
- v2_up = v2.get_upper(dstore=dstore)
- v2.max = max(v2_up)
- v1_over = set(itertools.filterfalse(lambda x: x < v2.max, v1_up))
- if v1_over:
- if v1.discard_upper(v1_over, dstore=dstore,
- constraint=(verbosity>1 or v1 in tracevar)
and self):
- changed.add(v1)
- return state, changed
- return state, changed
-
-### Integer domain and set domain variables
-##
-##class IVMemberSV(Constraint):
-## """Integer variable value must be member of set variable value."""
-##
-## def __init__(self, variables, problem=None, propagate=True,
-## weight=1):
-## Constraint.__init__(self, variables, problem=problem,
propagate=propagate,
-## weight=weight)
-## self.name = '{0} c {1}'.format(self.get_iv(), self.get_sv())
-##
-## def get_iv(self):
-## """The domain variable."""
-## return self.variables[0]
-##
-## def get_sv(self):
-## """The set variable."""
-## return self.variables[1]
-##
-## def fails(self, dstore=None):
-## """Fail if none of the IV values are in SV upper bound."""
-## iv = self.get_iv()
-## sv = self.get_sv()
-## iv_dom = iv.get_domain(dstore=dstore)
-## sv_up = sv.get_upper(dstore=dstore)
-## if len(iv_dom & sv_up) == 0:
-## return True
-## return False
-##
-## def is_entailed(self, dstore=None):
-## """Entailed if IV values are subset of SV lower bound."""
-## iv = self.get_iv()
-## sv = self.get_sv()
-## iv_dom = iv.get_domain(dstore=dstore)
-## sv_low = sv.get_lower(dstore=dstore)
-### if self.pattern:
-### # For patterns, the propagator is entailed if every element
in the domain of iv
-### # unifies with the lower bound of sv
-### if all([unify_fssets({tup}, sv_low) for tup in iv_dom]):
-### return True
-## if iv_dom <= sv_low:
-## return True
-## return False
-##
-## def infer(self, dstore=None, verbosity=0, tracevar=None):
-## changed = set()
-## iv = self.get_iv()
-## sv = self.get_sv()
-## # Constrain the values of IV to be within upper bound of SV
-## if iv.strengthen(sv.get_upper(dstore=dstore), dstore=dstore,
-## constraint=(verbosity>1 or iv in tracevar) and
self):
-## changed.add(iv)
-## # If IV is determined, constrain SV to include it
-## if iv.determined(dstore=dstore, verbosity=verbosity) is not
False:
-## if sv.strengthen_lower(iv.get_domain(dstore=dstore),
dstore=dstore,
-## constraint=(verbosity>1 or sv in
tracevar) and self):
-## changed.add(sv)
-## if verbosity > 1 and changed:
-## print(' Variables {} changed'.format(changed))
-## return Constraint.sleeping, changed
-
-# Selection constraint propagators
-
-class Selection(Constraint):
- """Superclass for most selection constraints.
-
- mainvar: set domain var or int domain var (set var for primitive
propagators)
- seqvars: set domain vars, int domain vars, constant sets, or constant
ints
- (set var for primitive propagators)
- selvar: set domain var or int domain var (set var for primitive
propagators)
- """
-
- def __init__(self, mainvar=None, selvar=None, seqvars=None,
- problem=None, weight=1, record=True):
- Constraint.__init__(self, [mainvar, selvar] + seqvars,
problem=problem,
- weight=weight, record=record)
- self.selvar = selvar
- self.mainvar = mainvar
- self.seqvars = seqvars
-
- def is_entailed(self, dstore=None):
- """Entailed only if all vars are determined.
- """
- if self.mainvar.determined(dstore=dstore, constraint=self) is not
False \
- and self.selvar.determined(dstore=dstore, constraint=self) is
not False \
- and all([v.determined(dstore=dstore, constraint=self) is not
False for v in self.seqvars]):
- return True
- return False
-
- def infer(self, dstore=None, verbosity=0, tracevar=None):
- """Some rules are common to all Selection subclasses."""
-
- changed = set()
- state = Constraint.sleeping
- seqvars = self.seqvars
- selvar = self.selvar
- mainvar = self.mainvar
-
- # If there is only one seqvar, then the main var is constrained to
be that value
- # and the selection var has to be {0} or 0
- if len(seqvars) == 1:
- # since there's only one seq var to select, the selection
variable has to
- # be {0} or 0
- if selvar.determine(0, dstore=dstore,
- constraint=(verbosity>1 or selvar in
tracevar) and self):
- changed.add(selvar)
- seqvar = seqvars[0]
- if seqvar.determined(dstore=dstore, verbosity=verbosity,
constraint=self) is not False:
- if mainvar.determine(seqvar.get_value(dstore=dstore),
dstore=dstore,
- constraint=(verbosity>1 or mainvar in
tracevar) and self):
- changed.add(mainvar)
- state = Constraint.entailed
- else:
- if
mainvar.strengthen_lower(seqvar.get_lower(dstore=dstore), dstore=dstore,
- constraint=(verbosity>1 or
mainvar in tracevar) and self):
- changed.add(mainvar)
- if
mainvar.strengthen_upper(seqvar.get_upper(dstore=dstore), dstore=dstore,
- constraint=(verbosity>1 or
mainvar in tracevar) and self):
- changed.add(mainvar)
-## if mainvar.determined(dstore=dstore,
verbosity=verbosity) is not False:
-## state = Constraint.entailed
- if changed:
- if verbosity > 1:
- print(' Variables {} changed'.format(changed))
- return state, changed
- # If all of the seqvars are equal to one another and determined
and the selection variable must
- # be non-empty, then the main var can be determined (as long as
the seqvar value is in its domain)
- if all([v.determined(dstore=dstore, verbosity=verbosity,
constraint=self) is not False for v in seqvars]) and \
- selvar.get_lower_card(dstore=dstore) > 0 and
seqvars[0].all_equal(seqvars[1:], dstore=dstore):
- seq0_val = seqvars[0].get_value(dstore=dstore)
- if mainvar.determine(seq0_val, dstore=dstore,
constraint=(verbosity>1 or mainvar in tracevar) and self):
- changed.add(mainvar)
- state = Constraint.entailed
- if verbosity > 1 and changed:
- print(' Variables {} changed'.format(changed))
- return state, changed
- # If the upper bound of selvar includes values that are greater
than the length of selvars,
- # then those values can be eliminated from the upper bound.
***The diff for this file has been truncated for email.***
=======================================
--- /l3lite/cs.py Fri May 2 22:53:47 2014 UTC
+++ /dev/null
@@ -1,164 +0,0 @@
-#
-# L3Lite CS: what is needed to implement l3 style constraint satisfaction
-# using the lexicon/grammars created.
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.04.26
-# -- Created
-
-from .constraint import *
-
-class Solver:
- """A solver for a constraint satisfaction problem."""
-
- id = 0
-
- running = 0
- succeeded = 1
- failed = 2
- distributable = 3
- skipped = 4
-
- def __init__(self, constraints, dstore,
- description='', verbosity=0):
- self.constraints = constraints
- self.dstore = dstore
- # Used in solver's printname
- self.description = description
- self.verbosity=verbosity
- self.entailed = []
- self.failed = []
- self.status = Solver.running
- self.id = Solver.id
- Solver.id += 1
-
- def __repr__(self):
- return "Solver{} ({})".format(' ' + self.description if
self.description else '', self.id)
-
- def exit(self, result):
- if result == Constraint.failed:
- return True
- else:
- return self.fixed_point(result)
-
- def fixed_point(self, awaken, verbosity=0):
- if verbosity > 1:
- s = "# constraints to awaken: {}, # variables to determine: {}|
{}"
- print(s.format(len(awaken), len(self.dstore.ess_undet),
len(self.dstore.undetermined)))
- if self.dstore.is_determined():
- # All essential variables are determined
- self.status = Solver.succeeded
- return True
- elif len(awaken) == 0:
-# # No more constraints are awake
-# if self.dstore.is_determined():
-# # All variables are determined in the dstore or
peripheral: success
-# self.status = Solver.succeeded
-# else:
- # No more constraints apply: continue search
- # More variables to determine; we have to distribute
- self.status = Solver.distributable
- return True
- # Keep propagating
- return False
-
- def run(self, verbosity=0, tracevar=[]):
- """Run the constraints until CS fails or a fixed point is
reached."""
- if verbosity:
- s = "Running {} with {}|{} undetermined variables, {}
constraints"
- print(s.format(self, len(self.dstore.ess_undet),
len(self.dstore.undetermined), len(self.constraints)))
- awaken = set(self.constraints)
- it = 0
- while not self.exit(awaken):
- if verbosity > 1:
- print("Running iteration {}".format(it))
- awaken = self.run_constraints(awaken, verbosity=verbosity,
tracevar=tracevar)
- it += 1
-
- def run_constraints(self, constraints, verbosity=0, tracevar=[]):
- awaken = set()
- all_changed = set()
- for constraint in constraints:
-# print(" Running {}".format(constraint))
- state, changed_vars = constraint.run(dstore=self.dstore,
verbosity=verbosity, tracevar=tracevar)
- all_changed.update(changed_vars)
- if state == Constraint.entailed:
-# print(" Entailed")
- # Constraint is entailed; add it to the list of those.
- self.entailed.append(constraint)
- # Delete it from awaken if it's already there
- if constraint in awaken:
- awaken.remove(constraint)
-
- if state == Constraint.failed:
- if verbosity:
- print("FAILED {}".format(constraint))
- return Constraint.failed
-# # constraint fails; remove it from the entailed or awaken
lists if it's there
-# if constraint in self.entailed:
-# self.entailed.remove(constraint)
-# if constraint in awaken:
-# awaken.remove(constraint)
-## # penalize the CSpace
-## self.penalty += constraint.weight
-# # and remember that it failed
-# self.failed.append(constraint)
-
- # Check whether any of the changed vars cannot possibly be
determined; if so,
- # the constraint fails
-# if state != Constraint.failed:
- for var in changed_vars:
- try:
- var.determined(dstore=self.dstore, verbosity=verbosity)
- except VarError:
- if verbosity:
- print("{} CAN'T BE DETERMINED, SO {} MUST
FAIL".format(var, constraint))
- return Constraint.failed
-# state = Constraint.failed
-# break
-
-# if self.penalty > self.max_penalty:
-# # CSpace fails without running other constraints
-# if verbosity:
-# print('PENALTY {} EXCEEDS MAXIMUM
{}!'.format(self.penalty, self.max_penalty))
-# self.status = CSpace.failed
-# return CSpace.failed
-
- # If the constraint succeeds, add the constraints of its
variables to awaken
-# if state not in [Constraint.failed]:
-# print(" Awakening, # changed vars
{}".format(len(changed_vars)))
- for var in changed_vars:
- # Add constraints for changed var to awaken unless those
constraints are already entailed
- # or failed
- update_cons = {c for c in var.constraints if c not in
self.entailed and c not in self.failed}
-# print(" Awakening {} constraints for
{}".format(len(update_cons), var))
- if var == tracevar and verbosity:
- print('Adding {} constraints for changed variable
{}'.format(len(update_cons), tracevar))
- awaken.update(update_cons)
-# print('update cons {}'.format(update_cons))
- if verbosity > 1:
- print('# changed vars {}'.format(len(all_changed)))
- return awaken
-
=======================================
--- /l3lite/entry.py Fri May 2 20:10:49 2014 UTC
+++ /dev/null
@@ -1,776 +0,0 @@
-#
-# L3Lite entries: words, grammatical morphemes, lexemes, lexical classes
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.02.10
-# -- Created
-# Possible subclasses: Lex (word, lexeme, class), Gram
-# 2014.02.12
-# -- Inheritance (class to word/lexeme): completed except for government.
-# (But possible conflicts in order are not handled yet.)
-# 2014.02.15
-# -- Methods for making dicts from entries and entries from dict, used
-# in serialization.
-# 2014.02.16-18
-# -- Class for groups (multi-word expressions).
-# 2014.02.18
-# -- Cloning of Lex instances (for groups and L3 nodes).
-# 2014.03.18
-# -- Lots of changes and additions to groups.
-# 2014.03.24
-# -- words attribute in Group is a list of [word, feat_dict] pairs.
-# 2014.04.16
-# -- Created simpler Group (with no dependency types), renamed old Group
to MWE.
-# 2014.04.20
-# -- Matching of group and sentence nodes.
-# 2014.04.30
-# -- Eliminated everything but Groups.
-# 2014.05.01
-# -- Group/node matching fixed.
-
-import copy, itertools
-import yaml
-
-from .features import *
-
-LEXEME_PRE = '%'
-CLASS_PRE = '$'
-
-class Entry:
- """Superclass for Group and possibly other lexical classes."""
-
- ID = 1
- dflt_dep = 'dflt'
-
- def __init__(self, name, language, id=0, trans=None):
- """Initialize name and basic features: language, trans, count,
id."""
- self.name = name
- self.language = language
- self.trans = trans
- self.count = 1
- if id:
- self.id = id
- else:
- self.id = Entry.ID
- Entry.ID += 1
-
- def __repr__(self):
- """Print name."""
- return '<{}:{}>'.format(self.name, self.id)
-
- @staticmethod
- def is_cat(name):
- """Is this the name of a category?"""
- return name[0] == CLASS_PRE
-
- ## Serialization
-
- def to_dict(self):
- """Convert the entry to a dictionary to be serialized in a yaml
file."""
- d = {}
- d['name'] = self.name
-# d['language'] = self.language
- d['count'] = self.count
- if self.trans:
- d['trans'] = self.trans
- d['id'] = self.id
- return d
-
- @staticmethod
- def from_dict(d, language):
- """Convert a dict (loaded from a yaml file) to an Entry object."""
- e = Entry(d.get('name'), language)
- e.count = d.get('count', 1)
- e.id = d.get('id', 1)
- e.trans = d.get('trans')
- return e
-
- def update_count(self, count=1):
- """Update count on the basis of data from somewhere."""
- self.count += count
-
- ### Translations (word, gram, lexeme, group entries)
- ###
- ### Translations are stored in a language-id-keyed dict.
- ### Values are dicts with target entry names as ids.
- ### Values are dicts with correspondence ('cor'), count ('cnt'), etc.
- ### as keys.
-
- def get_translations(self, language, create=True):
- """Get the translation dict for language in word/lexeme/gram entry.
- Create it if it doesn't exist and create is True."""
- if self.trans is None:
- self.trans = {}
- if language not in self.trans and create:
- self.trans[language] = {}
- return self.trans.get(language)
-
- def add_trans(self, language, trans, count=1):
- """Add translation to the translation dictionary for language,
- initializing its count."""
- transdict = self.get_translations(language, create=True)
- transdict[trans] = {'c': count}
-
- def update_trans(self, language, trans, count=1):
- """Update the count of translation."""
- transdict = self.get_translations(language)
- if trans not in transdict:
- s = "Attempting to update non-existent translation {} for {}"
- raise(EntryError(s.format(trans, self.name)))
- transdict[trans]['c'] += count
-
-##class Lex(Entry):
-##
-## cloneID = 1
-##
-## def __init__(self, name, language, cls=None, id=0, group=False):
-## """In addition to Entry features, initialize
-## depsin, depsout, order, agr, gov, grams, and (for word and
lexeme) class."""
-## Entry.__init__(self, name, language, id=id)
-## self.depsin = None
-## self.depsout = None
-## self.order = None
-## self.agr = None
-## self.gov = None
-## self.grams = None
-## self.cls = cls
-## self.cloneID = 0
-## # Whether entry is part of a group
-## self.group = group
-##
-## def __repr__(self):
-## """Print name and a unique identifier."""
-## return '<{}:{}{}>'.format(self.name, self.id, ';' +
str(self.cloneID) if self.cloneID else '')
-##
-## ## Cloning
-## ## Needed for groups, which consist of copies of lexes and
-## ## for L3 node entries
-##
-## def clone(self, group=True):
-## copied = Lex(self.name, self.language, cls=self.cls, id=self.id,
group=group)
-## copied.depsin = self.depsin
-## copied.depsout = self.depsout
-## copied.order = self.order
-## copied.agr = self.agr
-## copied.gov = self.gov
-## copied.grams = self.grams
-## copied.cloneID = Lex.cloneID
-## Lex.cloneID += 1
-## return copied
-##
-## ## Serialization
-##
-## def to_dict(self):
-## """Convert the lex to a dictionary to be serialized in a yaml
file."""
-## d = Entry.to_dict(self)
-## if self.depsin:
-## d['depsin'] = copy.deepcopy(self.depsin)
-## if self.depsout:
-## d['depsout'] = copy.deepcopy(self.depsout)
-## if self.order:
-## d['order'] = copy.deepcopy(self.order)
-## if self.agr:
-## d['agr'] = copy.deepcopy(self.agr)
-## if self.gov:
-## d['gov'] = copy.deepcopy(self.gov)
-## if self.grams:
-## d['grams'] = self.grams.copy()
-## if self.cls:
-## d['cls'] = self.cls
-## return d
-##
-## @staticmethod
-## def from_dict(d, language):
-## """Convert a dict (loaded from a yaml file) to a Lex object."""
-## l = Lex(d.get('name'), language, cls=d.get('cls'))
-## if d.get('depsin'):
-## l.depsin = d.get('depsin')
-## if d.get('depsout'):
-## l.depsout = d.get('depsout')
-## if d.get('order'):
-## l.order = d.get('order')
-## if d.get('agr'):
-## l.agr = d.get('agr')
-## if d.get('gov'):
-## l.gov = d.get('gov')
-## if d.get('grams'):
-## l.grams = d.get('grams')
-## return l
-##
-## ## Dependencies (word, lexeme, class entries)
-##
-## def get_depin(self, label, create=False):
-## """Get the dict of features of incoming dependencies with label,
creating
-## the dict if it's not there and create is True."""
-## if self.depsin is None:
-## self.depsin = {}
-## if create and label not in self.depsin:
-## self.depsin[label] = {}
-## self.language.record_label(label)
-## return self.depsin.get(label)
-##
-## def add_depin(self, label, feats):
-## """Assign feats (a dictionary) to features for incoming
dependencies with label,
-## or update the current features."""
-## d = self.get_depin(label, create=True)
-## d.update(feats)
-##
-## def get_depout(self, label, create=False):
-## """Get the dict of features of outgoing dependencies with label,
creating
-## the dict if it's not there and create is True."""
-## if self.depsout is None:
-## self.depsout = {}
-## if create and label not in self.depsout:
-## self.depsout[label] = {}
-## self.language.record_label(label)
-## return self.depsout.get(label)
-##
-## def add_depout(self, label, feats):
-## """Assign feats (a dictionary) to features for outgoing
dependencies with label,
-## or update the current features."""
-## d = self.get_depout(label, create=True)
-## d.update(feats)
-##
-## ## Dependency features
-## ## A dict with keys
-## ## 'min', 'max', 'dflt', 'maxdist'
-##
-## def set_deps_feat(self, featdict, key, value):
-## featdict[key] = value
-##
-## def get_deps_feat(self, featdict, key):
-## return featdict.get(key)
-##
-## ## Order constraints
-## ## A constraint is a tuple of dependency labels and '^' representing
the head
-##
-## def get_order(self, create=False):
-## """Get the set of order constraint tuples, creating the set if
it's not there
-## and create is True."""
-## if self.order is None and create:
-## self.order = []
-## return self.order
-##
-## def add_order(self, constraint):
-## """Add an order constraint tuple to the set of order
constraints."""
-## order_constraints = self.get_order(create=True)
-## order_constraints.append(constraint)
-## self.language.record_order(constraint)
-##
-## ## Grammatical features associated with words, classes, and lexemes
-##
-## def get_gram(self, feature, create=False):
-## """Get the possible values and their counts for grammatical
feature.
-## If this is a word, the value is a string; if a class or lexeme,
a dict
-## of values and counts."""
-## if self.grams is None:
-## self.grams = {}
-### if feature not in self.grams and create:
-### self.grams[feature] = {}
-## return self.grams.get(feature)
-##
-## def set_gram(self, feat, values):
-## """Set possible values and their counts for grammatical feature.
-## values is a dict of values and their counts."""
-## if self.grams is None:
-## self.grams = {}
-## if feat in self.grams:
-## s = "Entry for {} already has a constraint for feature {}"
-## raise(EntryError(s.format(self.name, feat)))
-## self.grams[feat] = values
-##
-## def update_gram_value(self, feat, value, count=1):
-## """Add count to the current count for feature value."""
-## gram = self.get_gram(feat, create=True)
-## if value in gram:
-## gram[value] += count
-## else:
-## gram[value] = count
-##
-## ## Agreement and government
-##
-## ## An agreement constraint requires a dependency label, a head
feature, and
-## ## and a dependent feature.
-##
-## def add_agr(self, deplabel, head_feat, dep_feat):
-## """Add an agreement constraint to the list of constraints in the
entry."""
-## if self.agr is None:
-## self.agr = []
-## self.agr.append([deplabel, head_feat, dep_feat])
-##
-## ## A government constraint requires a dependency label, a dependent
feature,
-## ## and a dependent value.
-##
-## def add_gov(self, deplabel, dep_feat, dep_value):
-## if self.gov is None:
-## self.gov = []
-## self.gov.append([deplabel, dep_feat, dep_value])
-##
-## ## Inheritance: copying features from classes to lexemes and words
-## ## at initialization
-##
-## def inherit(self):
-## if not self.cls:
-## return
-## cls = self.language.get_class(self.cls)
-## if not cls:
-## s = "Class {} for {} does not exist"
-## raise(EntryError(s.format(self.cls, self)))
-## self.inherit_deps(cls)
-## self.inherit_order(cls)
-## self.inherit_grams(cls)
-## self.inherit_agr(cls)
-## self.inherit_gov(cls)
-## # Also inherit translation?
-##
-## def inherit_deps(self, cls):
-## """Inherit dependency constraints (in and out) from class."""
-## # In
-## cls_depsin = cls.depsin
-## if cls_depsin:
-## if self.depsin is None:
-## self.depsin = {}
-## for label, cls_constraints in cls_depsin.items():
-## if label in self.depsin:
-## constraints = self.depsin[label]
-## for k, v in cls_constraints.items():
-## if k in constraints:
-## continue
-## constraints[k] = v
-## else:
-## # Should this be a copy of cls_constraints?
-## self.depsin[label] = cls_constraints
-## # Out
-## cls_depsout = cls.depsout
-## if cls_depsout:
-## if self.depsout is None:
-## self.depsout = {}
-## for label, cls_constraints in cls_depsout.items():
-## if label in self.depsout:
-## constraints = self.depsout[label]
-## for k, v in cls_constraints.items():
-## if k in constraints:
-## continue
-## constraints[k] = v
-## else:
-## # Should this be a copy of cls_constraints?
-## self.depsout[label] = cls_constraints
-##
-## def inherit_order(self, cls):
-## """Inherit order constraints from class."""
-## cls_order = cls.order
-## if cls_order:
-## my_order = self.get_order(create=True)
-## # Just add all constraints (tuples) from the class to ones
-## # already there in the word or lexeme; what if there are
-## # conflicts?? (sort these out later)
-## for co in cls_order:
-## if co not in my_order:
-## my_order.append(co)
-##
-## def inherit_grams(self, cls):
-## """Inherit grammatical features from class."""
-## cls_grams = cls.grams
-## if cls_grams:
-## if self.grams is None:
-## self.grams = {}
-## for feature, value in cls_grams.items():
-## if feature in self.grams:
-## # word/lexeme gram has priority over class, so
-## # ignore this
-## continue
-## # copy any other feature/value constraint
-## # (should the value be a copy??)
-## self.grams[features] = value
-##
-## def inherit_agr(self, cls):
-## """Inherit agreement constraints from class."""
-## cls_agr = cls.agr
-## if cls_agr:
-## if self.agr is None:
-## self.agr = []
-## for constraint in cls_agr:
-## if constraint not in self.agr:
-## self.agr.append(constraint)
-##
-## def inherit_gov(self, cls):
-## """Inherit government constraints from class."""
-##
-##class MWE(Entry):
-## """Multi-word expressions. Each group consists of a head and a set
of nodes,
-## possibly connected to other nodes through explicit dependencies and
an explicit
-## order of the nodes.
-## Variable slots have dedicated names that allow them to be
-## referenced in translations.
-## MWEs must be created *after* other lexical items.
-## {index: [word_obj, {dep/position_feats}...}
-## """
-##
-## def __init__(self, name, language, head, head_feats=None,
head_order=None, head_lexeme=False):
-## """name of a MWE is something like acabar_de_V.
-## head is the word that is the syntactic head of the group."""
-## Entry.__init__(self, name, language)
-## # A list of [word feats] pairs; index in the list is the word's
(node's) ID
-## self.words = []
-## self.word_id = 0
-## if head_lexeme:
-## self.head_lexeme = True
-## head_type = language.get_lexeme(head)
-## else:
-## self.head_lexeme = False
-## head_type = language.get_words(head)
-## if not head_type:
-### print("No existing lexical entry in {} for head of group
{}".format(language, name))
-## # SHOULD THIS BE RECORDED IN THE WORD LEXICON?
-## self.head = language.add_word(head, group=True)
-## else:
-## # Use the first one if there's more than one
-## self.head = head_type[0].clone()
-## self.words.append([self.head, {}])
-### self.words[self.word_id] = [self.head, {}]
-## if head_order is not None:
-## self.words[word_id][1]['o'] = head_order
-### self.words[self.word_id][1]['o'] = head_order
-## self.word_id += 1
-##
-## def __repr__(self):
-## """Print name."""
-## return '<{}:{}>'.format(self.name, self.id)
-##
-## # Serialization
-##
-## def to_dict(self):
-## """Convert the group to a dictionary to be serialized in a yaml
file."""
-## d = Entry.to_dict(self)
-## d['head_lexeme'] = self.head_lexeme
-### d['words'] = {}
-## d['words'] = []
-## w = d['words']
-### for index, lex in self.words.items():
-## for lex in enumerate(self.words):
-## l = lex[0]
-## name = l.name
-## w.append([name])
-### w[index] = [name]
-## if len(lex) == 2:
-## w[-1].append(copy.deepcopy(lex[1]))
-### w[index].append(copy.deepcopy(lex[1]))
-## return d
-##
-## @staticmethod
-## def from_dict(d, language):
-## """Convert a dict (loaded from a yaml file) to a MWE object."""
-## lexeme = d['head_lexeme']
-## m = MWE(d.get('name'), language, d.get('words')[0][0],
head_lexeme=lexeme)
-### for id, word in d.get('words').items():
-## for id, word in enumerate(d.get('words')):
-## if id == 0:
-## # Just handle the dependencies for this case
-## deps = word[1]
-## m.words[0][1] = copy.deepcopy(deps)
-## else:
-## name = word[0]
-## lex = language.get_words(name)[0]
-## if len(word) == 2:
-## deps = word[1]
-## lex_info = [lex.clone(), copy.deepcopy(deps)]
-## else:
-## lex_info = [lex.clone()]
-## m.words.append(lex_info)
-## return m
-##
-## ## Getters
-##
-## def get_word(self, index):
-## """The lex and features for a word in the group with ID index."""
-## if index > len(self.words) - 1:
-## s = "No word in {} with internal ID {}"
-## raise(EntryError(s.format(self, index)))
-## return self.words[index]
-##
-## def get_word_feats(self, index):
-## word = self.get_word(index)
-## return word[1]
-##
-## def get_lex(self, id):
-## """Return the Lex with the given index."""
-## word = self.get_word(id)
-## return word[0]
-##
-## def get_daughters(self, word_id, dep=None):
-## """Return the indices of the daughters of word with id word_id
-## of type dep or all daughters if dep is None."""
-## feats = self.get_word_feats(word_id)
-## if 'd' not in feats:
-## return
-## daughters = feats['d']
-## if dep is not None:
-## return daughters.get(dep)
-## else:
-## # Maybe leave as an iterable object?
-## return
list(itertools.chain.from_iterable(daughters.values()))
-##
-## def get_mother(self, word_id):
-## """Return the type and index of the internal mother of word with
id word_id.
-## If this is the head, return None."""
-## feats = self.get_word_feats(word_id)
-## if 'm' not in feats:
-## return
-## return feats['m']
-##
-## def add_word(self, word, head_id=None, dependency=Entry.dflt_dep,
order=None):
-## """Add a word to the group, as dependent on dependency from
head."""
-## # For now, use first word entry
-## typ = self.language.get_words(word)
-## if not typ:
-### print("No existing lexical entry in {} for head of group
{}".format(self.language, word))
-## # SHOULD THIS BE RECORDED IN THE WORD LEXICON?
-## word = self.language.add_word(word, group=True)
-## else:
-## # Pick the first lexical entry for now
-## word = typ[0].clone()
-## self.words.append([word, {}])
-### self.words[self.word_id] = [word, {}]
-## if head_id is not None:
-## self.add_dep(head_id, self.word_id, dep=dependency)
-## if order is not None:
-## self.words[self.word_id][1]['o'] = order
-## id = self.word_id
-## self.word_id += 1
-## return id
-##
-## def add_dep(self, src, dest, dep=Entry.dflt_dep):
-## """Make a dependency of type dep from word with id src to word
with id dest."""
-## if src >= len(self.words):
-## s = "No word in {} with internal ID {}"
-## raise(EntryError(s.format(self, src)))
-## if dest >= len(self.words):
-## s = "No word in {} with internal ID {}"
-## raise(EntryError(s.format(self, dest)))
-## daughter_dict = self.get_word_feats(dest)
-## if 'm' in daughter_dict:
-## s = "Word {} in {} already has a mother"
-## raise(EntryError(s.format(dest, self)))
-## daughter_dict['m'] = (dep, src)
-## mother_dict = self.get_word_feats(src)
-## if 'd' not in mother_dict:
-## mother_dict['d'] = {}
-## mother_daughters = mother_dict['d']
-## if dep not in mother_daughters:
-## mother_daughters[dep] = []
-## mother_daughters[dep].append(dest)
-##
-## ## Translations
-## ## A translation of a group is a group in another language, with a
mapping or alignment
-## ## between the nodes (words) in the two groups.
-## ## The mapping takes the form of a list of target word indices or
None if the corresponding
-## ## word is unspecified or -1 if there is not corresponding word
(deletion). If there are
-## ## more words/nodes in the target than in the source group, the
length of the list of
-## ## is the number of target nodes.
-##
-## def add_trans(self, language, trans, count=1):
-## """Add translation to the translation dictionary for language,
-## initializing its count."""
-## Entry.add_trans(self, language, trans, count=count)
-## transdict = self.get_trans(language, trans)
-## transdict['m'] = [None for x in range(len(self.words))]
-##
-## def get_trans(self, language, trans, create=True):
-## alltrans = self.get_translations(language, create=create)
-## if not alltrans or trans not in alltrans:
-## s = "Attempting to update non-existent translation {} for {}"
-## raise(EntryError(s.format(trans, self.name)))
-## return alltrans[trans]
-##
-## def get_trans_map(self, language, trans):
-## """Get the mapping to nodes in translation."""
-## tdict = self.get_trans(language, trans)
-## return tdict.get('m')
-##
-## def get_trans_map1(self, language, trans, src_index):
-## """Get the mapped index of src_index in translation trans."""
-## map = self.get_trans_map(language, trans)
-## if not map:
-## s = "Attempting to access non-existing mapping for
translation {} of {}"
-## raise(EntryError(s.format(trans, self)))
-## return map[src_index]
-##
-## def add_trans_map(self, language, trans, src_id, trg_id):
-## """Add a correspondence between source and target nodes in a
translation mapping."""
-## tdict = self.get_trans(language, trans)
-### if 'm' not in tdict:
-### tdict['m'] = []
-### tdict['m'].append((src_id, trg_id))
-## tdict['m'][src_id] = trg_id
-##
-## def add_trans_del(self, language, trans, src_id):
-## """Record a node in the source group with nothing corresponding
to it in the target group."""
-## tdict = self.get_trans(language, trans)
-### if 'm' not in tdict:
-### tdict['m'] = []
-### tdict['m'].append((src_id, -1))
-## tdict['m'][src_id] = -1
-##
-## def add_trans_ins(self, language, trans, trg_id):
-## """Record a node in the target group with nothing corresponding
to it in the source group."""
-## tdict = self.get_trans(language, trans)
-### if 'm' not in tdict:
-### tdict['m'] = []
-## tdict['m'].append(trg_id)
-### tdict['m'].append((-1, trg_id))
-
-class Group(Entry):
- """Primitive multi-word expressions. Default is a head with unlabeled
dependencies
- to all other tokens and translations, including alignments, to one or
more
- other languages."""
-
- def __init__(self, tokens, head_index=-1, head='', language=None,
name='',
- features=None, trans=None):
- """Either head_index or head (a string) must be specified."""
- # tokens is a list of strings
- # name may be specified explicitly or not
- name = name or Group.make_name(tokens)
- Entry.__init__(self, name, language, trans=trans)
- self.tokens = tokens
- if head:
- self.head = head
- self.head_index = tokens.index(head)
- else:
- self.head = tokens[head_index]
- self.head_index = head_index
- # Either None or a list of feat-val dicts for tokens that require
them
- # Convert dicts to Features objects
- if isinstance(features, list):
- features = [Features(d) if d else None for d in features]
- self.features = features
-
- def __repr__(self):
- """Print name."""
- return '{}:{}'.format(self.name, self.id)
-
- @staticmethod
- def make_name(tokens):
- # Each token is either a string or a (string, feat_dict) pair
-# strings = []
-# for token in tokens:
-# if isinstance(token, str):
-# strings.append(token)
-# else:
-# form, feat_dict = token
-# fv = ['{}={}'.format(f, v) for f, v in feat_dict.items()]
-# fv = ','.join(fv)
-# strings.append("{}:{}".format(form, fv))
- return '_'.join(tokens)
-
- # Serialization
-
- def to_dict(self):
- """Convert the group to a dictionary to be serialized in a yaml
file."""
- d = Entry.to_dict(self)
- d['words'] = self.tokens
- d['features'] = self.features
- return d
-
- @staticmethod
- def from_dict(d, language, head):
- """Convert a dict (loaded from a yaml file) to a Group object."""
- tokens = d['words']
- features = d.get('features')
- name = d.get('name', '')
- trans = d.get('trans')
- p = Group(tokens, head=head, language=language, features=features,
- name=name, trans=trans)
- return p
-
- def match_nodes(self, snodes, head_sindex, verbosity=0):
- """Attempt to match the group tokens (and features) with snodes
from a sentence,
- returning the snode indices and root and unified features if
any."""
-# print("Does {} match {}".format(self, snodes))
- match_snodes = []
- for index, token in enumerate(self.tokens):
- match_snodes1 = []
- feats = self.features[index] if self.features else None
- if verbosity:
- print(" Attempting to match {}".format(token))
- matched = False
- for node in snodes:
- if verbosity:
- print(" Trying {}, token index {}, snode index {}
head index {}".format(node, index, node.index, head_sindex))
- if index == self.head_index:
- # This token is the head of the group
- if node.index == head_sindex:
- # This was already matched in lexicalization
-# if index == node.index == head_sindex:
- # This is the token corresponding to the group head
- node_match = node.match(token, feats)
- if node_match == False:
- # This has to match, so fail now
- return False
- else:
- match_snodes1.append((node.index, node_match))
- if verbosity:
- print(" Head matched
already".format(node))
- matched = True
- # Don't look further for an snode to match
this token
- break
- else:
- node_match = node.match(token, feats)
- if verbosity:
- print(' Node {} match {}:{}, {}::
{}'.format(node, token, index, feats, node_match))
- if node_match != False:
- match_snodes1.append((node.index, node_match))
- if verbosity:
- print(" Matched node {}".format(node))
- matched = True
- if not matched:
- if verbosity:
- print(" {} not matched; failed".format(token))
- return False
- else:
- match_snodes.append(match_snodes1)
-# print("Group {}, s_indices {}".format(self, match_snodes))
- return match_snodes
-
- ### Translations
-
- ## Alignments: position correspondences, agreement constraints
- ## አድርጎ አያውቅም -> godhe hin beeku
- ## a: {positions: (1, 2),
- ## agreements: {gen: gen},
- ## featmaps: {((pers, 2), (num, 2)): ((pers, 3), (num, 2))}
- ## }
-
- def add_alignment(self, trans):
- pass
-
-class EntryError(Exception):
- '''Class for errors encountered when attempting to update an entry.'''
-
- def __init__(self, value):
- self.value = value
-
- def __str__(self):
- return repr(self.value)
-
=======================================
--- /l3lite/features.py Sun Apr 27 18:07:59 2014 UTC
+++ /dev/null
@@ -1,140 +0,0 @@
-#
-# L3Lite features (dicts).
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.04.19
-# -- Created.
-# 2014.04.23-24
-# -- Unification with one or both values sets.
-# Unification with a list/tuple of feature-value pairs.
-# Copying of agreement features: agree:().
-
-class Features(dict):
-
- def __init__(self, dct):
- dict.__init__(self, dct)
-
- def __repr__(self):
- l = []
- for f, v in self.items():
- l.append("{}={}".format(f, v))
- return "{{{0}}}".format(','.join(l))
-
- def to_list(self):
- """Convert features dict to a sorted list."""
- l = list(self.items())
- l.sort()
- return l
-
- @staticmethod
- def unify_sets(x, y):
- """If both are sets, their intersection. If one is a set,
- the other if it's a member of the set."""
- if isinstance(x, set):
- if isinstance(y, set):
- return x & y
- elif y in x:
- return y
- elif isinstance(y, set):
- if x in y:
- return x
- return False
-
- @staticmethod
- def simple_unify(x, y):
- """Unify the values x and y, returning the result or 'fail'."""
- # If they're the same, return one.
- if x == y:
- return x
- # If one or the other is a set, return the intersection
- # (a single value if one is not a set)
- elif isinstance(x, set) or isinstance(y, set):
- u = Features.unify_sets(x, y)
- if u is not False:
- return u
- else:
- return 'fail'
-# # If both are dicts, call unify_dict
-# elif isinstance(x, dict) and isinstance(y, dict):
-# x.unify(y)
- # Otherwise fail
- else:
- return 'fail'
-
- def unify(self, other):
- """other is a Features object or a dict. Attempt to unify self
with other,
- returning the result or 'fail'."""
- result = Features({})
- for k in set(self.keys()) | set(other.keys()):
- # Check all of the keys of self and other
- self_val, other_val = self.get(k, 'nil'), other.get(k, 'nil')
- if self_val != 'nil':
- if other_val != 'nil':
- # If x and y both have a value for k, try to unify the
values
- u = Features.simple_unify(self_val, other_val)
- if u == 'fail':
- return 'fail'
- else:
- result[k] = u
- else:
- # If self has a value for k but other doesn't, use
self's value
- result[k] = self_val
- elif other_val != 'nil':
- # If other has a value for k but self doesn't, use other's
value
- result[k] = other_val
-
- return result
-
- def agree(self, target, agrs):
- """Make target agree with self on features specified in agrs
dict."""
- for src_feat, targ_feat in agrs.items():
- if src_feat in self:
- src_value = self[src_feat]
- if targ_feat in target and target[targ_feat] != src_value:
- # Clash; fail!
- return 'fail'
- else:
- target[targ_feat] = src_value
-
- def match_list(self, feat_list):
- """Does this Features object match list or tuple of feature/value
pairs?"""
- for feat, val in feat_list:
- if feat in self:
- if Features.simple_unify(val, self[feat]) == 'fail':
- return False
- return True
-
- @staticmethod
- def unify_all(features_list):
- """Unify all of the Features objects (or None) in the list, if
possible."""
- result = Features({})
- for features in features_list:
- if not features:
- continue
- result = result.unify(features)
- if result == 'fail':
- return 'fail'
- return result
-
=======================================
--- /l3lite/language.py Fri May 2 22:53:47 2014 UTC
+++ /dev/null
@@ -1,449 +0,0 @@
-#
-# L3Lite languages: dictionaries of lexical/grammatical entries
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.02.09
-# -- Created
-# 2014.02.10
-# -- Made entries a separate class.
-# 2014.02.15
-# -- Methods for serializing and deserializing languages (using YAML).
-# 2014.03.24
-# -- Words, lexemes, and classes are all in the same dictionary
(self.words).
-# Lexemes start with %, classes with $.
-# 2014.04.17
-# -- Analysis and generation dicts for particular wordforms.
-# 2014.04.30
-# -- Eliminated entry types in lexicon other than Groups and forms.
-
-from .entry import *
-
-import os, yaml
-
-LANGUAGE_DIR = os.path.join(os.path.dirname(__file__), 'languages')
-
-class Language:
- """Dictionaries of words, lexemes, grammatical features, and
- lexical classes."""
-
- languages = []
-
- def __init__(self,
- name, abbrev,
- words=None, lexemes=None, grams=None, classes=None,
- mwes=None, groups=None, forms=None,
- genforms=None):
- """Initialize dictionaries and names."""
- self.name = name
- self.abbrev = abbrev
- # Words, lexemes, classes
- self.words = words or {}
- # Combine with words in a single lexicon?
-# self.lexemes = lexemes or {}
-# self.grams = grams or {}
-# self.classes = classes or {}
-# self.mwes = mwes or {}
- self.forms = forms or {}
- self.groups = groups or {}
- # Dict of groups with names as keys
- self.groupnames = {}
-# # Record possibilities for dependency labels, feature values,
order constraints
-# self.possible = {}
- # Record whether language has changed since last loaded
- self.changed = False
- # Dictionary of morphologically generated words:
- # {lexeme: {(feat, val): {(feat, val): wordform,...}, ...}, ...}
- self.genforms = genforms or {}
- Language.languages.append(abbrev)
-
- def __repr__(self):
- """Print name."""
- return '<<{}>>'.format(self.name)
-
- def to_dict(self):
- """Convert the language to a dictionary to be serialized as a yaml
file."""
- d = {}
- d['name'] = self.name
- d['abbrev'] = self.abbrev
-# d['possible'] = self.possible
- # Entries: each is a dict, whose values must be converted to dicts
-# if self.grams:
-# grams = {}
-# for k, v in self.grams.items():
-# grams[k] = v.to_dict()
-# d['grams'] = grams
-# if self.classes:
-# classes = {}
-# for k, v in self.classes.items():
-# classes[k] = v.to_dict()
-# d['classes'] = classes
- # Lexemes and words should probably be separate dictionaries (and
files).
-# if self.lexemes:
-# lexemes = {}
-# for k, v in self.lexemes.items():
-# lexemes[k] = v.to_dict()
-# d['lexemes'] = lexemes
-# if self.words:
-# words = {}
-# for k, v in self.words.items():
-# # Words are lists
-# words[k] = [lex.to_dict() for lex in v]
-# d['words'] = words
-# if self.mwes:
-# mwes = {}
-# for k, v in self.mwes.items():
-# mwes[k] = [m.to_dict() for m in v]
-# d['mwes'] = mwes
- if self.groups:
- groups = {}
- for head, v in self.groups.items():
- groups[head] = [g.to_dict() for g in v]
- d['groups'] = groups
- if self.forms:
- forms = {}
- for k, v in self.forms.items():
- # v is an fv dict or a list of fv dicts
- forms[k] = v
- return d
-
- def write(self, directory, filename=''):
- """Serialize the language."""
- filename = filename or self.abbrev + '.lg'
- path = os.path.join(directory, filename)
- with open(path, 'w', encoding='utf8') as file:
- yaml.dump(self.to_dict(), file)
-
- @staticmethod
- def from_dict(d, reverse=True):
- """Convert a dict (loaded from a yaml file) to a Language
object."""
- l = Language(d.get('name'), d.get('abbrev'))
- l.possible = d.get('possible')
-# grams = d.get('grams')
-# if grams:
-# l.grams = {}
-# for k, v in grams.items():
-# l.grams[k] = Entry.from_dict(v, l)
-# classes = d.get('classes')
-# if classes:
-# l.classes = {}
-# for k, v in classes.items():
-# l.classes[k] = Lex.from_dict(v, l)
-# lexemes = d.get('lexemes')
-# if lexemes:
-# l.lexemes = {}
-# for k, v in lexemes.items():
-# l.lexemes[k] = Lex.from_dict(v, l)
-# words = d.get('words')
-# if words:
-# l.words = {}
-# for k, v in words.items():
-# l.words[k] = [Lex.from_dict(lex, l) for lex in v]
-# mwes = d.get('mwes')
-# if mwes:
-# l.mwes = {}
-# for k, v in mwes.items():
-# l.mwes[k] = [MWE.from_dict(m, l) for m in v]
- groups = d.get('groups')
- if groups:
- l.groups = {}
- for head, v in groups.items():
- group_objs = [Group.from_dict(g, l, head) for g in v]
- l.groups[head] = group_objs
- # Add groups to groupnames dict
- for go in group_objs:
- l.groupnames[go.name] = go
- forms = d.get('forms')
- if forms:
- l.forms = {}
- for k, v in forms.items():
- # v should be a dict or a list of dicts
- # Convert features value to a Features object
- if isinstance(v, dict):
- if 'features' in v:
- v['features'] = Features(v['features'])
- else:
- for d in v:
- if 'features' in d:
- d['features'] = Features(d['features'])
- l.forms[k] = v
- if reverse:
- # Add item to genform dict
- if isinstance(v, dict):
- if 'seg' not in v:
- l.add_genform(k, v['root'], v.get('features'))
- else:
- for d in v:
- l.add_genform(k, d['root'], d.get('features'))
- return l
-
- @staticmethod
- def read(path):
- """Create a Language from the contents of a yaml file, a dict
- that must be then converted to a Language."""
- with open(path, encoding='utf8') as file:
- dct = yaml.load(file)
- return Language.from_dict(dct)
-
- @staticmethod
- def load(*abbrevs):
- languages = []
- for abbrev in abbrevs:
- path = os.path.join(LANGUAGE_DIR, abbrev + '.lg')
- try:
- language = Language.read(path)
- languages.append(language)
- print("Loading language {}".format(language))
- except IOError:
- print("That language doesn't seem to exist.")
- return
- return languages
-
- ### Basic setters. Create entries (dicts) for item. For debugging
purposes, include name
- ### in entry.
-
-## def add_word(self, word, cls=None, mwe=False):
-## entry = Lex(word, self, cls=cls, mwe=mwe)
-## if word in self.words:
-## self.words[word].append(entry)
-## else:
-## self.words[word] = [entry]
-## self.changed = True
-## return entry
-##
-## def add_lexeme(self, lexeme, cls=None):
-## if lexeme in self.words:
-## s = "Lexeme {} already in dictionary"
-## raise(LanguageError(s.format(lexeme)))
-## entry = Lex(lexeme, self, cls=cls)
-## # Maybe not a list since there's always only one
-## self.words[lexeme] = [entry]
-## self.changed = True
-## return entry
-
- def add_form(self, form, dct, reverse=True):
- """Form dict has root, features, cats.
- If reverse it True, also add the form to the genforms dicdt."""
- if form not in self.forms:
- self.forms[form] = dct
- else:
- entry = self.forms[form]
- if isinstance(entry, dict):
- # Make this the second entry
- self.forms[form] = [entry, dct]
- else:
- # There are already two or more entries in a list
- entry.append(dct)
- if reverse:
- lexeme = dct['root']
- features = dct['features']
- self.add_genform(form, lexeme, features)
-
- def add_genform(self, form, lexeme, features):
- """Add the form to a lexeme- and feature-keyed dict."""
- if lexeme not in self.genforms:
- self.genforms[lexeme] = {}
- featdict = self.genforms[lexeme]
- # features is a Features object; convert it to a list of tuples
- features = tuple(features.to_list())
- featdict[features] = form
-# feat = features.pop(0)
-# self.make_featdict(featdict, feat, features, form)
-
-# @staticmethod
-# def make_featdict(featdict, feat, features, form):
-# """Make a feat-value dict with the form as final value."""
-# if not features:
-# featdict[feat] = form
-# return
-# if feat not in featdict:
-# featdict[feat] = {}
-# new_feat = features.pop(0)
-# Language.make_featdict(featdict[feat], new_feat, features, form)
-
-## def add_class(self, cls):
-## if cls in self.words:
-## s = "Class {} already in dictionary"
-## raise(LanguageError(s.format(cls)))
-## entry = Lex(cls, self)
-## # Maybe not a list since there's always only one
-## self.words[cls] = [entry]
-## self.changed = True
-## return entry
-##
-## def add_mwe(self, name, head, head_order=None, head_lexeme=False):
-## entry = MWE(name, self, head, head_order=head_order,
head_lexeme=head_lexeme)
-## if head not in self.mwes:
-## self.mwes[head] = []
-## self.mwes[head].append(entry)
-## self.changed = True
-## return entry
-
- def add_group(self, tokens, head_index=-1, head='', name='',
features=None):
- group = Group(tokens, head_index=head_index, head=head,
- language=self, name=name, features=features)
-# print('Group {}, head {}'.format(group, group.head))
- if features:
- head_i = tokens.index(group.head)
- head_feats = features[head_i]
- else:
- head_feats = None
- self.add_group_to_lexicon(group.head, group, head_feats)
- self.groupnames[group.name] = group
- self.changed = True
- return group
-
- def add_group_to_lexicon(self, head, group, features):
- if not features:
- # Add the group to the list of groups for the head word/lexeme
- if head not in self.groups:
- self.groups[head] = {}
- if () not in self.groups[head]:
- self.groups[head][()] = []
- self.groups[head][()].append(group)
- else:
- # Convert fv dict to an alphabetized tuple of fv pairs
- fvs = list(features.items())
- fvs.sort()
- fvs = tuple(fvs)
- if head not in self.groups:
- self.groups[head] = {}
- if fvs not in self.groups[head]:
- self.groups[head][fvs] = []
- self.groups[head][fvs].append(group)
-
-## def add_gram(self, gram, feature, count=1):
-## """A gram, for example, 'plural', must have a feature, for
example,
-## 'number'."""
-## if gram in self.grams:
-## s = "Grammatical morpheme {} already in dictionary"
-## raise(LanguageError(s.format(gram)))
-## entry = Entry(gram, self)
-## self.grams[gram] = entry
-## entry.feature = feature
-## self.grams[gram] = entry
-## self.record_gram(gram, feature, count)
-## self.changed = True
-## return entry
-##
-## def record_gram(self, name, feature, count):
-## """Record the gram value and its count under its feature name."""
-## if 'features' not in self.possible:
-## self.possible['features'] = {}
-## if feature not in self.possible['features']:
-## self.possible['features'][feature] = {}
-## self.possible['features'][feature][name] = count
-##
-## def get_possible_feat_values(self, feature):
-## """Possible values and associated counts for grammatical
feature."""
-## if 'features' not in self.possible:
-## self.possible['features'] = {}
-## return self.possible['features'].get(feature)
-
- ### Basic getters.
-
-## def get_words(self, word):
-## """Returns a list of word entries."""
-## return self.words.get(word)
-##
-## def get_class(self, cls):
-## """Returns a single class entry."""
-## return self.words.get(cls)[0]
-##
-## def get_gram(self, gram):
-## """Returns a single gram feature value entry."""
-## return self.grams.get(gram)
-##
-## def get_lexeme(self, lexeme):
-## """Returns a single lexeme entry."""
-## return self.words.get(lexeme)[0]
-
- ### Generation of word forms
-
- def generate(self, root, features, verbosity=0):
- if verbosity:
- print("Generating {}:{}".format(root, features))
- if not features:
- # Just return the "root"
- return [root]
- if root not in self.genforms:
- print("Impossible to generate root {}".format(root))
- return
- gendict = self.genforms[root]
- # List of matching forms
- result = []
- for feat_list, form in gendict.items():
- if features.match_list(feat_list):
- result.append(form)
-# print('Feat list {}, form {}'.format())
- if not result:
- print("No forms found for {}:{}".format(root, features))
- return result
-
-## ## Dependencies (word, lexeme, class entries)
-##
-## def record_label(self, label):
-## """Record the dependency label in the set of possible labels."""
-## if 'deplabels' not in self.possible:
-## self.possible['deplabels'] = []
-## if label not in self.possible['deplabels']:
-## self.possible['deplabels'].append(label)
-##
-## def get_possible_labels(self):
-## return self.possible.get('deplabels')
-##
-## ## Order constraints
-## ## A constraint is a tuple of dependency labels and '^' representing
the head
-##
-## def record_order(self, constraint):
-## """Record the constraint tuple in the set of possible
constraints for the language."""
-## if 'order' not in self.possible:
-## self.possible['order'] = []
-## if constraint not in self.possible['order']:
-## # Append a *copy* of the constraint list
-## self.possible['order'].append(constraint[:])
-##
-## def get_possible_orders(self):
-## return self.possible.get('order')
-##
-## ## Agreement constraints
-##
-## def record_agr(self, constraint):
-## """An agreement constraint is a tuple consisting of
-## dep label, head feature, dependent feature."""
-## if 'agr' not in self.possible:
-## self.possible['agr'] = []
-## if constraint not in self.possible['agr']:
-## # Append a *copy* of the constraint list
-## self.possible['agr'].append(constraint[:])
-
-class LanguageError(Exception):
- '''Class for errors encountered when attempting to update the
language.'''
-
- def __init__(self, value):
- self.value = value
-
- def __str__(self):
- return repr(self.value)
-
=======================================
--- /l3lite/languages/amh.lg Sun Apr 27 18:07:59 2014 UTC
+++ /dev/null
@@ -1,59 +0,0 @@
-name: አማርኛ
-abbrev: amh
-groups:
- በላ:
- - words: [$food, በላ]
- features: [{case: acc}, False]
- trans:
- orm:
- - [$food_nyaate,
- {alg: [0, 1],
- agr: [false, {sp: prs, sn: num, sg: gen}]}]
- አወቀ:
- - words: [$vb, አወቀ]
- features: [{tam: ger}, {tam: imf, pol: neg}]
- name: ^አድርጎ^አያውቅም
- trans:
- orm:
- - [^godhe_hin^beeku,
- {alg: [0, 2],
- agr: [{sp: prs, sn: num, sg: gen},
- {sp: prs, sn: num, sg: gen}]}]
- - words: [$fact, አወቀ]
- trans:
- orm:
- - [$fact_beeke,
- {alg: [0, 1],
- agr: [False, {sp: prs, sn: num, sg: gen}]}]
- አሳ:
- - words: [አሳ]
- trans:
- orm:
- - [qurxummii]
- - [kalluuna]
-
-forms:
- ያውቃል:
- root: አወቀ
- features: {tam: imf, pol: aff, sp: 3, sn: 0, sg: 0}
- cats: [$vb]
- አያውቅም:
- root: አወቀ
- features: {tam: imf, pol: neg, sp: 3, sn: 0, sg: 0}
- cats: [$vb]
- በልቶ:
- root: በላ
- features: {tam: ger, sp: 3, sn: 0, sg: 0}
- cats: [$vb]
- በላ:
- root: በላ
- features: {tam: prf, sp: 3, sn: 0, sg: 0}
- cats: [$vb]
- ድንች:
- root: ድንች
- features: {num: 0, poss: 0, def: 0}
- cats: [$food, $thing]
- አሳ:
- root: አሳ
- features: {num: 0, poss: 0, def: 0}
- cats: [$food, $animal, $thing]
=======================================
--- /l3lite/languages/eng.lg Fri May 2 20:10:49 2014 UTC
+++ /dev/null
@@ -1,53 +0,0 @@
-name: English
-abbrev: eng
-forms:
- "it's":
- seg:
- - [it, {root: it, features: {num: 0, per: 3, gen: 2}, cats: [$pron]}]
- - [is, {root: be, features: {tns: prs, per: 3, num: 0}, cats: [$aux,
$cop]}]
- end:
- root: end
- features: {num: 0, prs: 3}
- cats: [$abs]
- boy:
- root: boy
- features: {num: 0, prs: 3}
- cats: [$sbd]
- act:
- root: act
- features: {num: 0, prs: 3}
- cats: [$sth]
- us:
- root: we
- features: {num: 1, prs: 1, case: 1}
- cats: [$sbd]
- them:
- root: they
- features: {num: 1, prs: 3, case: 1}
- song:
- root: song
- features: {num: 0, prs: 3}
- cats: [$singable]
- sang:
- root: sing
- features: {tns: prt}
- read:
- root: read
- features: {tns: prt}
-
-groups:
- end:
- - words: [the, end, of, the, world]
- read:
- - words: [read, $sbd, the, riot, act]
- - words: [read, $sth]
- sing:
- - words: [sing, $singable]
- boy:
- - words: [the, boy]
- us:
- - words: [us]
- them:
- - words: [them]
- song:
- - words: [a, song]
=======================================
--- /l3lite/languages/orm.lg Sun Apr 27 18:07:59 2014 UTC
+++ /dev/null
@@ -1,40 +0,0 @@
-name: afaan oromoo
-abbrev: orm
-groups:
- nyaate:
- - words: [$food, nyaate]
- features: [{case: acc}, False]
- beeke:
- - words: [$vb, hin, beeke]
- features: [{tam: cnt}, false, {tam: prs, pol: neg}]
- name: ^godhe_hin^beeku
- - words: [$fact, beeke]
- qurxummii:
- - words: [qurxummii]
- kalluuna:
- - words: [kalluuna]
-forms:
- beeka:
- root: beeke
- features: {tam: prs, pol: aff, prs: 3, num: 0, gen: 0}
- cats: [$vb]
- beeku:
- root: beeke
- features: {tam: prs, pol: neg, prs: 3, num: 0, gen: 0}
- cats: [$vb]
- nyaate:
- root: nyaate
- features: {tam: cnt, prs: 3, num: 0, gen: 0}
- cats: [$vb]
- dinnicha:
- root: dinnicha
- features: {num: 0, case: acc}
- cats: [$food, $thing]
- qurxummii:
- root: qurxummii
- features: {num: 0, case: acc}
- cats: [$food, $animal, $thing]
- kalluuna:
- root: kalluuna
- features: {num: 0, case: acc}
- cats: [$food, $animal, $thing]
=======================================
--- /l3lite/languages/spa.lg Thu May 1 06:39:44 2014 UTC
+++ /dev/null
@@ -1,67 +0,0 @@
-name: español
-abbrev: spa
-
-groups:
- cantar:
- # cantar las cuarenta (with pronoun indirect object)
- - words: [$algnp, cantar, las, cuarenta]
- trans:
- eng:
- - [read_$sbd_the_riot_act,
- {alg: [1, 0, 2, 3],
- agr: [false, {tmp: tns, num: num, prs: prs}, false, false]}]
- # cantar (una canción)
- - words: [cantar, $cantable]
- trans:
- eng:
- - [sing_$singable,
- {alg: [0, 1],
- agr: [{tmp: tns, num: num, prs: prs}, false]}]
- canción:
- - words: [una, canción]
- trans:
- eng:
- - [a_song, {alg: [0, 1]}]
- canciones:
- - words: [canciones]
- les:
- - words: [les]
- trans:
- eng:
- - [them]
- muchacho:
- - words: [el, muchacho]
- trans:
- eng:
- - [the boy, {alg: [0, 1]}]
-
-forms:
- canción:
- root: canción
- features: {num: 0, prs: 3}
- cats: [$cantable, $algo]
- canciones:
- root: canción
- features: {num: 1, prs: 3}
- cats: [$cantable, $algo]
- les:
- root: ellos
- features: {num: 1, prs: 3, case: i}
- cats: [$algnp]
- nos:
- root: nosotros
- features: {num: 1, prs: 1, case: i}
- cantó:
- root: cantar
- features: {num: 0, prs: 3, tmp: prt}
- canta:
- root: cantar
- features: {num: 0, prs: 3, tmp: prs}
- muchacho:
- root: muchacho
- features: {num: 0, prs: 3, gen: 0}
- cats: [$algn]
-
-
-
-
=======================================
--- /l3lite/sentence.py Fri May 2 22:53:47 2014 UTC
+++ /dev/null
@@ -1,1089 +0,0 @@
-#
-# L3Lite sentences and how parse and translate them.
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.04.15
-# -- Created.
-# 2014.04.19-20
-# -- Group matching. GInst, GNode, and SNode classes.
-# 2014.04.22
-# -- Solution class.
-# 2014.04.26-7
-# -- Translation class and realization.
-# 2014.04.28
-# -- Variables for sentence analysis.
-# 2014.04.29-30
-# -- Fixed some constraints and initial variables values.
-# 2014.05.01
-# -- Handling source words that aren't "covered" (no group candidate
matches them):
-# variables and source constraints.
-# 2014.05.02
-# -- Uncovered source words: target ordering constraints.
-# Introduced "chunks": target output units that are not connected.
-
-import itertools
-# ui.py loads language, etc.
-from .ui import *
-from .cs import *
-
-class Sentence:
- """A sentence is a list of words (or other lexical tokens) that gets
- assigned a set of variables and constraints that are run during
- parsing or translation."""
-
- id = 0
-
- def __init__(self, raw='', language=None,
- tokens=None, analyses=None,
- nodes=None, groups=None, target=None,
- verbosity=0):
- self.set_id()
- # A string representing the raw sentence
- self.raw = raw
- # Source language: a language object
- self.language = language
- # Target language: a language object or None
- self.target = target
- # A list of SNode objects, one for each token
- self.nodes = nodes or []
- # A list of candidate groups (realized as GInst objects) found
during lexicalization
- self.groups = groups or []
- # Control messages
- self.verbosity = verbosity
- # GNodes in GInsts
- self.gnodes = []
- # A list of constraints to run
- self.constraints = []
- # Root domain store for variables
- self.dstore = DStore(name="S{}".format(self.id))
- # A dict of sentence-level variables
- self.variables = {}
- # Solver to find solutions
- self.solver = Solver(self.constraints, self.dstore,
- description='for group selection',
verbosity=verbosity)
- # Solutions found during parsing
- self.solutions = []
- if verbosity:
- print("Created Sentence object {}".format(self))
-
- def set_id(self):
- self.id = Sentence.id
- Sentence.id += 1
-
- def __repr__(self):
- """Print name."""
- if self.raw:
- return '|| ({}) {} ||'.format(self.id, self.raw)
- else:
- return '|| {} sentence {} ||'.format(self.language, self.id)
-
- def do(self, verbosity=0):
- """If target language, translate. If not, parse."""
-# if verbosity:
- if self.target:
- print("Attempting to translate sentence {}".format(self))
- else:
- print("Attempting to parse sentence {}".format(self))
- if self.initialize(verbosity=verbosity):
- if self.solve(verbosity=verbosity):
- if self.target:
- for solution in self.solutions:
- solution.translate(verbosity=verbosity)
- print("Translations found:")
- for translation in solution.translations:
- translation.display()
-
- def initialize(self, verbosity=0):
- """Things to do before running constraint satisfaction."""
- if verbosity:
- print("Initializing {}".format(self))
- self.tokenize(verbosity=verbosity)
- self.lexicalize(verbosity=verbosity)
- if not self.groups:
- print("No groups found for {}".format(self))
- return False
- else:
- self.create_variables(verbosity=verbosity)
- self.create_constraints(verbosity=verbosity)
- return True
-
- def solve(self, verbosity=0):
- """Run constraints and create a single solution."""
- if verbosity:
- print("Attempting to find solutions for {}".format(self))
- if self.run(verbosity=verbosity):
- self.create_solution(verbosity=verbosity)
- if verbosity:
- print("Found solution {}".format(self.solutions[0]))
- return True
- else:
- if verbosity:
- print("No solution found")
- return False
-
- def tokenize(self, verbosity=0):
- """Segment the sentence string into tokens, analyze them
morphologically,
- and create a SNode object for each."""
- if verbosity:
- print("Tokenizing {}".format(self))
- if not self.nodes:
- # (Otherwise it's already done.)
- # Split at spaces by default (later allow for dedicated
language-specific tokenizers).
- tokens = self.raw.split()
- self.nodes = []
- index = 0
- for token in tokens:
- # Look up token in language.forms
- if token not in self.language.forms:
- # Not found, just use the raw string
- self.nodes.append(SNode(token, index, None, self))
- index += 1
- else:
- # A dict, for unambiguous forms, or a list of dicts,
for ambiguous forms
- formdict = self.language.forms[token]
- if isinstance(formdict, dict):
- # A single entry
- if 'seg' in formdict:
- segs = formdict['seg']
- for seg in segs:
- tok, analysis = seg
- self.nodes.append(SNode(tok, index,
analysis, self))
- index += 1
- else:
- self.nodes.append(SNode(token, index,
formdict, self))
- index += 1
- else:
- # Multiple dicts: ambiguity; let node handle it
- self.nodes.append(SNode(token, index, formdict,
self))
- index += 1
-
- def lexicalize(self, verbosity=0):
- """Find and instantiate all groups that are compatible with the
tokens in the sentence."""
- if verbosity:
- print("Lexicalizing {}".format(self))
- if not self.nodes:
- print("Tokenization must precede lexicalization.")
- return
- candidates = []
- for node in self.nodes:
- # Get keys into lexicon for this node
- keys = {node.token}
- anal = node.analyses
- if anal:
- if isinstance(anal, list):
- for a in anal:
- keys.add(a.get('root'))
- else:
- keys.add(anal.get('root'))
- # Look up candidate groups in lexicon
- for k in keys:
- if k in self.language.groups:
- for group in self.language.groups[k]:
-# print("Checking group {} for {}".format(group,
node))
- # Reject group if it doesn't have a translation in
the target language
- if self.target and not
group.get_translations(self.target.abbrev):
- print("No translation for {}".format(group))
- continue
- candidates.append((node.index, group))
- # Now filter candidates to see if all words are present in the
sentence
- # For each group, save a list of list of sentence token indices
that correspond
- # to the group's words
- groups = []
- for head_i, group in candidates:
- # Matching snodes, along with root and unified features if any
- if verbosity > 1:
- print("Matching group {}".format(group))
- snodes = group.match_nodes(self.nodes, head_i)
- if not snodes:
- # This group is out
- if verbosity > 1:
- print("Failed to match")
- continue
- if verbosity > 1:
- print('Group {} matches snodes {}'.format(group, snodes))
- groups.append((head_i, snodes, group))
- # Create a GInst object and GNodes for each surviving group
- self.groups = [GInst(group, self, head_i, snodes, index) for
index, (head_i, snodes, group) in enumerate(groups)]
- # Assign sentence-level indices to each GNode; store gnodes in list
- sent_index = 0
- for group in self.groups:
- for gnode in group.nodes:
- gnode.sent_index = sent_index
- self.gnodes.append(gnode)
- sent_index += 1
- # Number of GNodes
- self.ngnodes = sent_index
- # Record uncovered snodes
- covered = {}
- for gnode in self.gnodes:
- si = gnode.snode_indices
- for i in si:
- if i not in covered:
- covered[i] = []
- covered[i].append(gnode.sent_index)
- for snode in self.nodes:
- snode.gnodes = covered.get(snode.index, [])
-
- ## Create IVars and (set) Vars with sentence DS as root DS
-
- def ivar(self, name, domain, ess=False):
- self.variables[name] = IVar(name, domain, rootDS=self.dstore,
- essential=ess)
-
- def svar(self, name, lower, upper, lower_card=0, upper_card=MAX,
- ess=False):
- self.variables[name] = Var(name, lower, upper, lower_card,
upper_card,
- essential=ess, rootDS=self.dstore)
-
- def create_variables(self, verbosity=0):
- # All abstract (category) and instance (word or lexeme) gnodes
- catnodes = set()
- instnodes = set()
- for group in self.groups:
- for node in group.nodes:
- if node.cat:
- catnodes.add(node.sent_index)
- else:
- instnodes.add(node.sent_index)
- # Snodes that are merged with catnodes
- merged_snodes = set()
- for gn_index in catnodes:
- gn = self.gnodes[gn_index]
- merged_snodes.update(gn.snode_indices)
-
- self.svar('groups', set(), set(range(len(self.groups))),
- # At least 1, at most all groups
- 1, len(self.groups),
- ess=True)
- self.svar('gnodes', set(), set(range(self.ngnodes)),
- # At least size of smallest group, at most all
- min([len(g.nodes) for g in self.groups]),
- self.ngnodes)
- # covered snodes
- covered_snodes = {sn.index for sn in self.nodes if sn.gnodes}
- self.variables['snodes'] = DetVar('snodes', covered_snodes)
- # Category (abstract) nodes
- self.svar('catgnodes', set(), catnodes)
- # Instance gnodes that are merged with catnodes
- self.svar('merged_gnodes', set(), instnodes, 0, len(catnodes))
- # Snodes that involve merger of gnodes (that have two associated
gnodes)
- self.svar('merged_snodes', set(), merged_snodes, 0, len(catnodes))
- # Position pairs
- pos_pairs = set()
- for group in self.groups:
- pos_pairs.update(group.pos_pairs())
- self.svar('gnode_pos', set(), pos_pairs)
- ## Create variables for SNodes, GInsts, and GNodes
- for snode in self.nodes:
- snode.create_variables()
- for ginst in self.groups:
- ginst.create_variables()
- for gnode in self.gnodes:
- gnode.create_variables()
-
- def create_constraints(self, verbosity=0):
- if verbosity:
- print("Creating constraints for {}".format(self))
- # Relation among abstract, concrete, and all gnodes for each snode
- for snode in self.nodes:
- if snode.gnodes:
- # Only do this for covered snodes
- self.constraints.extend(Union([snode.variables['gnodes'],
- snode.variables['cgnodes'],
-
snode.variables['agnodes']]).constraints)
- # Constraints involved groups with category (abstract) nodes
- for group in self.groups:
- if group.nanodes > 0:
- # Only do this for groups with abstract nodes
- # For each group, the set of snodes is the union of the
concrete and abstract nodes
-
self.constraints.extend(Union([group.variables['gnodes_pos'],
-
group.variables['agnodes_pos'],
-
group.variables['cgnodes_pos']]).constraints)
- # For each group, the set of groups merged with it +
itself is the union of the
- # set of groups merged with it and the set consisting of
its index
-
self.constraints.extend(Union([group.variables['merged_groups_incl'],
-
group.variables['merged_groups_excl'],
-
DetVar('g{}'.format(group.index), {group.index})]).constraints)
- # The set of merged gnodes for the group is the union of
the merged nodes for all
- # abstract gnodes in the group
-
self.constraints.append(UnionSelection(group.variables['merged_gnodes'],
-
group.variables['agnodes'],
-
[gn.variables['merge_cgn'] for gn in self.gnodes]))
- # The set of groups merged with the group is the union of
groups associated with the
- # gnodes that are merged with the group's abstract nodes
-
self.constraints.append(UnionSelection(group.variables['merged_groups_excl'],
-
group.variables['merged_gnodes'],
-
[DetIVar("gn{}->g".format(gn.sent_index), gn.ginst.index) for gn in
self.gnodes]))
- # The tree under this group consists of the union of the
snodes associated with this group
- # and those merged with it
-
self.constraints.append(UnionSelection(group.variables['tree'],
-
group.variables['merged_groups_incl'],
-
[g.variables['gnodes_pos'] for g in self.groups]))
- for gnode in group.nodes:
- if gnode.cat:
- # The gnodes that this abstract merges with must
be in the set of selected gnodes
-
self.constraints.extend(Inclusion([gnode.variables['merge_cgn'],
-
self.variables['gnodes']]).constraints)
- # The set of category (abstract) nodes used is the union of the
category nodes of the groups used
- self.constraints.append(UnionSelection(self.variables['catgnodes'],
- self.variables['groups'],
- [g.variables['agnodes'] for
g in self.groups]))
- # The set of merged gnodes used is the union of the merged nodes
of the selected category nodes
-
self.constraints.append(UnionSelection(self.variables['merged_gnodes'],
- self.variables['catgnodes'],
- [gn.variables['merge_cgn']
for gn in self.gnodes]))
- # The set of merged gnodes used is the union of the merged gnodes
of all merging snodes
-
self.constraints.append(UnionSelection(self.variables['merged_gnodes'],
-
self.variables['merged_snodes'],
- [sn.variables['mgnodes']
for sn in self.nodes]))
- # The set of category gnodes used is the union of the category
nodes associated with all merged snodes
- self.constraints.append(UnionSelection(self.variables['catgnodes'],
-
self.variables['merged_snodes'],
- [sn.variables['agnodes']
for sn in self.nodes]))
- # The set of category gnodes used is the union of the category
nodes associated with all merged gnodes
- self.constraints.append(UnionSelection(self.variables['catgnodes'],
-
self.variables['merged_gnodes'],
- [gn.variables['merge_agn']
for gn in self.gnodes]))
- # The set of merged snodes used is the union of the snodes
associated with all category nodes used
-
self.constraints.append(UnionSelection(self.variables['merged_snodes'],
- self.variables['catgnodes'],
- [gn.variables['merge_cw']
for gn in self.gnodes]))
- # The set of merged snodes used is the union of the snodes
associated with all merged gnodes
-
self.constraints.append(UnionSelection(self.variables['merged_snodes'],
-
self.variables['merged_gnodes'],
- [gn.variables['merge_aw']
for gn in self.gnodes]))
- # The set of merged gnodes must be a subset of the set of used
gnodes
- self.constraints.extend(Inclusion([self.variables['merged_gnodes'],
-
self.variables['gnodes']]).constraints)
- # All snodes must have distinct category nodes
- self.constraints.extend(Disjoint([sn.variables['agnodes'] for sn
in self.nodes]).constraints)
- # All concrete gnodes must have distinct category nodes
- self.constraints.extend(Disjoint([gn.variables['merge_agn'] for gn
in self.gnodes]).constraints)
- # All position constraints for snodes
-
self.constraints.append(PrecedenceSelection(self.variables['gnode_pos'],
-
[gn.variables['snodes'] for gn in self.gnodes]))
- # Position constraint pairs are the group position pairs for all
groups used
- self.constraints.append(UnionSelection(self.variables['gnode_pos'],
- self.variables['groups'],
-
[DetVar("g{}pos".format(g.index), g.pos_pairs()) for g in self.groups]))
- # Union selection on gnodes for each snode:
- # the union of the snode indices associated with the gnodes of an
snode is the snode's index
- gn2s = [gn.variables['snodes'] for gn in self.gnodes]
- s2gn = [s.variables['gnodes'] for s in self.nodes]
- for snode in self.nodes:
- if snode.gnodes:
- # Only for covered snodes
-
self.constraints.append(UnionSelection(DetVar("sn{}".format(snode.index),
{snode.index}),
-
snode.variables['gnodes'],
- gn2s))
- # Union of all gnodes used for snodes is all gnodes used
- self.constraints.append(UnionSelection(self.variables['gnodes'],
- self.variables['snodes'],
- s2gn))
- # Union of all gnodes for groups used is all gnodes used
- self.constraints.append(UnionSelection(self.variables['gnodes'],
- self.variables['groups'],
- [g.variables['gnodes'] for
g in self.groups]))
- # Union of all snodes for gnodes used is all snodes
- self.constraints.append(UnionSelection(self.variables['snodes'],
- self.variables['gnodes'],
- [gn.variables['snodes'] for
gn in self.gnodes]))
- # Complex union selection by groups on positions of all concrete
gnodes in each selected group
-
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
-
selvars=[g.variables['cgnodes_pos'] for g in self.groups],
-
seqvars=[s.variables['cgnodes'] for s in self.nodes],
-
mainvars=[g.variables['cgnodes'] for g in self.groups]))
- # Complex union selection by groups on positions of all category
gnodes in each selected group
-
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
-
selvars=[g.variables['agnodes_pos'] for g in self.groups],
-
seqvars=[s.variables['agnodes'] for s in self.nodes],
-
mainvars=[g.variables['agnodes'] for g in self.groups]))
-# # Complex union selection by groups on positions of all gnodes in
each selected group
-#
self.constraints.append(ComplexUnionSelection(selvar=self.variables['groups'],
-#
selvars=[g.variables['gnodes_pos'] for g in self.groups],
-#
seqvars=[s.variables['gnodes'] for s in self.nodes],
-#
mainvars=[g.variables['gnodes'] for g in self.groups]))
- # Set convexity (projectivity) within each group tree
-
self.constraints.append(ComplexSetConvexity(self.variables['groups'],
- [g.variables['tree']
for g in self.groups]))
-
- def run(self, verbosity=0):
- """Run constraint satisfaction on constraints, for now without
search if
- no solution is found."""
- self.solver.run(verbosity=verbosity)
- if verbosity:
- print("Solver status after run: {}".format(self.solver.status))
- return self.solver.status
-
- def create_solution(self, dstore=None, verbosity=0):
- """Assuming essential variables are determined in a domain store,
make a Solution object."""
- dstore = dstore or self.dstore
- # Get the indices of the selected groups
- groups = self.variables['groups'].get_value(dstore=dstore)
- ginsts = [self.groups[g] for g in groups]
- # Get the indices of the GNodes for each SNode
- s2gnodes = []
- for node in self.nodes:
- gnodes = node.variables['gnodes'].get_value(dstore=dstore)
- s2gnodes.append(gnodes)
- self.solutions.append(Solution(self, ginsts, s2gnodes,
- len(self.solutions)))
-
-class SNode:
- """Sentence token and its associated analyses and variables."""
-
- def __init__(self, token, index, analyses, sentence):
- # Raw form in sentence (possibly result of segmentation)
- self.token = token
- # Position in sentence
- self.index = index
- # List of analyses
- if analyses and not isinstance(analyses, list):
- analyses = [analyses]
- self.analyses = analyses
- # Back pointer to sentence
- self.sentence = sentence
- # We'll need these for multiple matchings
- self.cats = self.get_cats()
- # Indices of candidate gnodes for this snode found during
lexicalization
- self.gnodes = None
- # Dict of variables specific to this SNode
- self.variables = {}
- ## Tokens in target language for this SNode
- self.translations = []
-
- def __repr__(self):
- """Print name."""
- return "*{}:{}".format(self.token, self.index)
-
- ## Create IVars and (set) Vars with sentence DS as root DS
-
- def ivar(self, key, name, domain, ess=False):
- self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
- essential=ess)
-
- def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
- ess=False):
- self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
- rootDS=self.sentence.dstore,
essential=ess)
-
- def create_variables(self, verbosity=0):
- if not self.gnodes:
- # Nothing matched this snode; all variables empty
- self.variables['gnodes'] = EMPTY
- self.variables['cgnodes'] = EMPTY
- self.variables['agnodes'] = EMPTY
- self.variables['mgnodes'] = EMPTY
- else:
- # GNodes associated with this SNode: 0, 1, or 2
- self.svar('gnodes', "w{}->gn".format(self.index), set(),
- set(range(self.sentence.ngnodes)),
- 0, 2, ess=True)
- # Concrete GNodes associated with this SNode: must be 1
- self.svar('cgnodes', "w{}->cgn".format(self.index), set(),
- {gn.sent_index for gn in self.sentence.gnodes if not
gn.cat},
- 1, 1)
- # Abstract GNodes associated with this SNode: 0 or 1
- self.svar('agnodes', "w{}->agn".format(self.index), set(),
- {gn.sent_index for gn in self.sentence.gnodes if
gn.cat},
- 0, 1)
- # Merged concrete GNodes associated with this SNode: 0 or 1
- self.svar('mgnodes', "w{}->mgn".format(self.index), set(),
- {gn.sent_index for gn in self.sentence.gnodes if not
gn.cat},
- 0, 1)
-
- def get_cats(self):
- """The set of categories for the node's token, or None."""
- if not self.analyses:
- return None
- cats = set()
- for analysis in self.analyses:
- if 'cats' in analysis:
- cats.update(analysis['cats'])
- return cats
-
- def match(self, item, features, verbosity=0):
- """Does this node match the group item (word, lexeme, category) and
- any features associated with it?"""
- if verbosity:
- print(' SNode {} with features {} trying to match item {}
with features {}'.format(self, self.analyses, item, features))
- # If item is a category, don't bother looking at token
- if Entry.is_cat(item):
- if verbosity:
- print(' Cat item, looking in {}'.format(self.cats))
- if self.cats and item in self.cats:
-# print(" Token {} is in cat {}".format(self.token, item))
- if not self.analyses or not features:
- # Match; failure would be False
- return None
- else:
- for analysis in self.analyses:
- node_features = analysis.get('features')
- if node_features:
- u_features = node_features.unify(features)
- if u_features != 'fail':
- return analysis.get('root'), u_features
-# print(" Matching group features {} and sentence
features {}".format(features, node_features))
-# if node_features and
node_features.unify(features) != 'fail':
-# return True
- # None succeeded
- return False
- elif self.token == item:
- # item matches this node's token; features are irrelevant
- return None
- elif self.analyses:
- # Check each combination of root and analysis features
- for analysis in self.analyses:
- root = analysis.get('root', '')
- node_features = analysis.get('features')
-# print(" SNode features {}".format(node_features))
- if root == item:
- if not features:
- return root, node_features
-# return True
- elif not node_features:
- return root, features
-# return True
- else:
- u_features = node_features.unify(features)
- if u_features != 'fail':
- return root, u_features
-# elif node_features.unify(features) != 'fail':
-# return True
- return False
-
-class GInst:
-
- """Instantiation of a group; holds variables and GNode objects."""
-
- def __init__(self, group, sentence, head_index, snode_indices, index):
- # The Group object that this "instantiates"
- self.group = group
- self.sentence = sentence
- self.target = sentence.target
- # Index of group within the sentence
- self.index = index
- # Index of SNode associated with group head
- self.head_index = head_index
- # List of GNodes
- self.nodes = [GNode(self, index, indices) for index, indices in
enumerate(snode_indices)]
- # Dict of variables specific to this group
- self.variables = {}
- # List of target language groups
- self.translations = []
- self.ngnodes = len(self.nodes)
- # Number of abstract nodes
- self.nanodes = len([n for n in self.nodes if n.cat])
- # Number of concrete nodes
- self.ncgnodes = self.ngnodes - self.nanodes
-
- def __repr__(self):
- return '<<{}:{}>>'.format(self.group.name, self.group.id)
-
- def pos_pairs(self):
- """Return position constraint pairs for gnodes in the group."""
- gnode_pos = [gn.sent_index for gn in self.nodes]
- return set(itertools.combinations(gnode_pos, 2))
-
- ## Create IVars and (set) Vars with sentence DS as root DS
-
- def ivar(self, key, name, domain, ess=False):
- self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
- essential=ess)
-
- def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
- ess=False):
- self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
- rootDS=self.sentence.dstore,
- essential=ess)
-
- def create_variables(self, verbosity=0):
- ngroups = len(self.sentence.groups)
- nsnodes = len(self.sentence.nodes)
- # GNode indices for this GInst (determined)
- self.variables['gnodes'] =
DetVar('g{}->gnodes'.format(self.index), {gn.sent_index for gn in
self.nodes})
- # Abstract GNode indices for GInst (determined)
- if self.nanodes:
- self.variables['agnodes'] =
DetVar('g{}->agnodes'.format(self.index), {gn.sent_index for gn in
self.nodes if gn.cat})
- # Concrete GNode indices for GInst (determined)
- self.variables['cgnodes'] =
DetVar('g{}->cgnodes'.format(self.index), {gn.sent_index for gn in
self.nodes if not gn.cat})
- else:
- self.variables['agnodes'] = EMPTY
- self.variables['cgnodes'] = self.variables['gnodes']
- # SNode positions of GNodes for this GInst
- self.svar('gnodes_pos', 'g{}->gnodes_pos'.format(self.index),
- set(), set(range(nsnodes)), self.ngnodes, self.ngnodes)
- # SNode positions of abstract GNodes for this GInst
- if self.nanodes == 0:
- # No abstract nodes
- self.variables['agnodes_pos'] = EMPTY
- # SNode positions of concrete GNodes for this GInst
- self.variables['cgnodes_pos'] = self.variables['gnodes_pos']
- else:
- # Position for each abstract node in the group
- self.svar('agnodes_pos', 'g{}->agnodes_pos'.format(self.index),
- set(), set(range(nsnodes)), self.nanodes,
self.nanodes)
- # Position for each concrete node in the group
- self.svar('cgnodes_pos', 'g{}->cgnodes_pos'.format(self.index),
- set(), set(range(nsnodes)), self.ncgnodes,
self.ncgnodes)
- # Other GInsts merged with this one, excluding and including itself
- if self.nanodes == 0:
- # No abstract nodes, so this is a determined variable with one
value (its own index)
- self.variables['merged_groups_incl'] =
DetVar('g{}->mgroups_in'.format(self.index), {self.index})
- self.variables['merged_groups_excl'] = EMPTY
- self.variables['merged_gnodes'] = EMPTY
- else:
-
self.svar('merged_groups_incl', 'g{}->mgroups_in'.format(self.index),
- {self.index}, set(range(ngroups)), 1, self.nanodes+1)
-
self.svar('merged_groups_excl', 'g{}->mgroups_ex'.format(self.index),
- set(), set(range(ngroups)) - {self.index}, 0,
self.nanodes)
- # Set of all gnodes that are merged with abstract gnodes in
this group
- # upper bound is all gnodes not in this group
- self.svar('merged_gnodes', 'g{}->mgnodes'.format(self.index),
- set(), set(range(len(self.sentence.gnodes))) -
{gn.sent_index for gn in self.nodes})
- # Trees under GInst head (including self)
- if self.nanodes == 0:
- # No abstract gnodes, so same as gnodes
- self.variables['tree'] = self.variables['gnodes_pos']
- else:
- self.svar('tree', 'g{}->tree'.format(self.index),
- # at least as long as the number of self's nodes
- set(), set(range(nsnodes)), self.ngnodes, nsnodes)
-
- def set_translations(self, verbosity=0):
- """Find the translations of the group in the target language."""
- translations = self.group.get_translations(self.target.abbrev,
False)
- # If alignments are missing, add default alignment
- for i, t in enumerate(translations):
- if len(t) == 1:
- translations[i] = [t[0], {'alg':
list(range(len(self.nodes)))}]
-# print("Translations for {}: {}".format(self, translations))
- ntokens = len(self.group.tokens)
- for tgroup, alignment in translations:
- if isinstance(tgroup, str):
- # First find the target Group object
- tgroup = self.target.groupnames[tgroup]
- # Make any TNodes required
- nttokens = len(tgroup.tokens)
- tnodes = []
- if nttokens > ntokens:
- # Target group has more nodes than source group.
- # Indices of groups that are not empty.
- full_t_indices = set(alignment['alg'])
- empty_t_indices = set(range(nttokens)) - full_t_indices
- for i in empty_t_indices:
- empty_t_token = tgroup.tokens[i]
- empty_t_feats = tgroup.features[i] if tgroup.features
else None
- tnodes.append(TNode(empty_t_token, empty_t_feats,
self, i))
- # Deal with individual gnodes in the group
- gnodes = []
- for gn_index, gnode in enumerate(self.nodes):
- # Align gnodes with target tokens and features
- tokens = tgroup.tokens
- features = tgroup.features
- targ_index = alignment['alg'][gn_index]
- if targ_index < 0:
- # This means there's no target language token
- continue
- agrs = alignment['agr'][gn_index] if 'agr' in alignment
else None
- token = tokens[targ_index]
- feats = features[targ_index] if features else None
- gnodes.append((gnode, token, feats, agrs, targ_index))
- self.translations.append((tgroup, gnodes, tnodes))
-
-class GNode:
-
- """Representation of a single node (word, position) within a GInst
object."""
-
- def __init__(self, ginst, index, snodes):
- self.ginst = ginst
- self.index = index
- self.sentence = ginst.sentence
- self.snode_indices = [s[0] for s in snodes]
- self.snode_anal = [s[1] for s in snodes]
- # Whether this is the head of the group
- self.head = index == ginst.group.head_index
- # Group word, etc. associated with this node
- self.token = ginst.group.tokens[index]
- # Whether the associated token is abstract (a category)
- self.cat = Entry.is_cat(self.token)
- # Features associated with this group node
- groupfeats = ginst.group.features
- if groupfeats:
- self.features = groupfeats[index]
- else:
- self.features = None
- self.variables = {}
- # List of target-language token and features associated with this
gnode
-# self.translations = []
-
- def __repr__(self):
- return "{}|{}".format(self.ginst, self.token)
-
- ## Create IVars and (set) Vars with sentence DS as root DS
-
- def ivar(self, key, name, domain, ess=False):
- self.variables[key] = IVar(name, domain,
rootDS=self.sentence.dstore,
- essential=ess)
-
- def svar(self, key, name, lower, upper, lower_card=0, upper_card=MAX,
- ess=False):
- self.variables[key] = Var(name, lower, upper, lower_card,
upper_card,
- rootDS=self.sentence.dstore,
- essential=ess)
-
- def create_variables(self, verbosity=0):
- nsnodes = len(self.sentence.nodes)
- # SNode index for this GNode
- self.ivar('snodes', "gn{}->w".format(self.sent_index),
set(self.snode_indices))
- if self.cat:
- # Concrete nodes merged with this abstract node
- self.svar('merge_cgn', 'gn{}_cgmerge'.format(self.sent_index),
- set(), {gn.sent_index for gn in self.sentence.gnodes
if not gn.cat},
- 0, 1)
- self.svar('merge_cw', 'gn{}_cwmerge'.format(self.sent_index),
- set(), set(range(nsnodes)),
- 0, 1)
- self.variables['merge_agn'] = EMPTY
- self.variables['merge_aw'] = EMPTY
- else:
- # Abstract nodes merged with this concrete node
- self.svar('merge_agn', 'gn{}_agmerge'.format(self.sent_index),
- # indices of all abstract nodes
- set(), {gn.sent_index for gn in self.sentence.gnodes
if gn.cat},
- 0, 1)
- self.svar('merge_aw', 'gn{}_awmerge'.format(self.sent_index),
- set(), set(range(nsnodes)),
- 0, 1)
- self.variables['merge_cgn'] = EMPTY
- self.variables['merge_cw'] = EMPTY
-
-class TNode:
-
- """Representation of a node within a target language group that doesn't
- have a corresponding node in the source language group that it's the
- translation of."""
-
- def __init__(self, token, features, ginst, index):
- self.token = token
- self.features = features
- self.ginst = ginst
- self.sentence = ginst.sentence
- self.index = index
-
- def generate(self, verbosity=0):
- """Generate forms for the TNode."""
- if self.features:
- return self.sentence.target.generate(self.token, self.features)
- else:
- return [self.token]
-
- def __repr__(self):
- return "~{}|{}".format(self.ginst, self.token)
-
-class Solution:
-
- """A non-conflicting set of groups for a sentence, at most one instance
- GNode for each sentence token, exactly one sentence token for each
obligatory
- GNode in a selected group. Created when a complete variable
assignment.get('features'))
- is found for a sentence."""
-
- def __init__(self, sentence, ginsts, s2gnodes, index):
- self.sentence = sentence
- # List of sets of gnode indices
- self.s2gnodes = s2gnodes
- self.ginsts = ginsts
- self.index = index
- # A list of pairs for each snode: (gnodes, features)
- self.snodes = []
- # List of Translation objects; multiple translations are possible
- # for a given solution because of multiple translations for groups
- self.translations = []
-
- def __repr__(self):
- return "|< {} >|({})".format(self.sentence.raw, self.index)
-
- def translate(self, verbosity=0):
- """Do everything you need to create the translation."""
- self.merge_nodes(verbosity=verbosity)
- for ginst in self.ginsts:
- ginst.set_translations(verbosity=verbosity)
- self.make_translations(verbosity=verbosity)
-
- def make_translations(self, verbosity=0):
- """Combine GInsts for each translation in translation products, and
- separate gnodes into a dict for each translation."""
- if verbosity:
- print("Making translations for {}".format(self))
- translations = itertools.product(*[g.translations for g in
self.ginsts])
- for index, translation in enumerate(translations):
- t = Translation(self, translation, index, verbosity=verbosity)
- t.initialize(verbosity=verbosity)
- t.realize(verbosity=verbosity)
- self.translations.append(t)
-
- def merge_nodes(self, verbosity=0):
- """Merge the source features of cat and inst GNodes associated
with each SNode."""
- if verbosity:
- print("Merging target nodes for {}".format(self))
- for snode, gn_indices in zip(self.sentence.nodes, self.s2gnodes):
- # gn_indices is either one or two ints indexing gnodes in
self.gnodes
- gnodes = [self.sentence.gnodes[index] for index in gn_indices]
- features = []
- for gnode in gnodes:
-# print("gnode {}, snode_anal {}".format(gnode,
gnode.snode_anal))
- snode_indices = gnode.snode_indices
- snode_index = snode_indices.index(snode.index)
- snode_anal = gnode.snode_anal[snode_index]
- if snode_anal:
-# print("snode_anal {}".format(snode_anal))
- features.append(snode_anal[1])
- # Could this fail??
- features = Features.unify_all(features)
- self.snodes.append((gnodes, features))
-
-class Translation:
- """Representation of a single translation for an input sentence.
- Multiple translations are possible with a single Solution."""
-
- def __init__(self, solution, attribs, index, verbosity=0):
- self.solution = solution
- self.index = index
- self.sentence = solution.sentence
- self.verbosity = verbosity
- # Create GNode dict and list of target group, gnodes and tnodes
- # from attributes
- self.gnode_dict = {}
- self.groups_tnodes = []
- for tgroup, tgnodes, tnodes in attribs:
- for tgnode, tokens, feats, agrs, t_index in tgnodes:
- self.gnode_dict[tgnode] = (tgroup, tokens, feats, agrs,
t_index)
- self.groups_tnodes.append((tgroup, tnodes))
- # form list / order constraint pairs for each sentence position
- self.nodes = []
- # Ordered units: merged groups or uncovered words
- self.chunks = []
- # pairs of node indices representing order constraints
- self.order_pairs = []
- # Root domain store for variables
- self.dstore = DStore(name="T{}".format(self.index))
- # Order variables for each node
- self.variables = []
- # Order and disjunction constraints
- self.constraints = []
- # Translation needs a solver to figure out positions of words
- self.solver = Solver(self.constraints, self.dstore,
- description='for target realization',
- verbosity=verbosity)
- # Final output
- self.output = None
-
- def __repr__(self):
- return "{}[{}] ->".format(self.solution, self.index)
-
- def display(self):
- print("{} {}".format(self, self.out_string()))
-
- def out_string(self):
- '''Convert output to a string for pretty printing.'''
- l = []
- for word_list in self.output:
- if len(word_list) == 1:
- l.append(word_list[0])
- else:
- l.append('|'.join(word_list))
- return ' '.join(l)
-
- def initialize(self, verbosity=0):
- """Set up everything needed to run the constraints and generate
the translation."""
- if verbosity:
- print("Initializing translation {}".format(self))
- self.build(verbosity=verbosity)
- self.set_chunks(verbosity=verbosity)
- self.make_order_pairs(verbosity=verbosity)
- self.create_variables(verbosity=verbosity)
- self.create_constraints(verbosity=verbosity)
-
- def build(self, verbosity=0):
- """Unify translation features for merged nodes, map agr features
from source to target,
- generate surface target forms from resulting roots and features."""
- if verbosity:
- print('Building {}'.format(self))
- tginsts, tgnodes, trans_index = self.groups_tnodes,
self.gnode_dict, self.index
- # Figure out the target forms for each snode
- for snode, (gnodes, features) in zip(self.sentence.nodes,
self.solution.snodes):
- if not gnodes:
- # snode is not covered by any group
- self.nodes.append(([snode.token], []))
- else:
- t_indices = []
- if len(gnodes) > 1:
- # There are two gnodes for this snode; only the
concrete node
- # can have translations
- gn0, gn1 = tgnodes[gnodes[0]], tgnodes[gnodes[1]]
- tgroups, tokens, targ_feats, agrs, t_index = zip(gn0,
gn1)
- token = False
- i = 0
- # Find the token that's not a cat
- while not token:
- t = tokens[i]
- if not Entry.is_cat(t):
- token = t
- i += 1
- targ_feats = Features.unify_all(targ_feats)
- # Merge the agreements
- agrs = Translation.combine_agrs(agrs)
- if len(tgroups[0].tokens) > 1:
- t_indices.append((tgroups[0], gn0[-1]))
- if len(tgroups[1].tokens) > 1:
- t_indices.append((tgroups[1], gn1[-1]))
-# print('tgroups {}, token {}, t_indices
{}'.format(tgroups, token, t_indices))
- else:
- gnode = gnodes[0]
- tgroup, token, targ_feats, agrs, t_index =
tgnodes[gnode]
- if len(tgroup.tokens) > 1:
- t_indices.append((tgroup, t_index))
-# print('tgroup {}, token {}, t_index
{}'.format(tgroup, token, t_indices))
-
- # Make target and source features agree as required
- if not targ_feats:
- targ_feats = Features({})
- if agrs:
-# print("Feature agree, targ feats {}, agrs
{}".format(targ_feats, agrs))
- features.agree(targ_feats, agrs)
- # Generate target forms for this SNode
- gen = self.sentence.target.generate(token, targ_feats)
- if verbosity > 1:
- print(" Generating node form {}/{}: {}".format(token,
targ_feats, gen))
- self.nodes.append((gen, t_indices))
- # Add TNode elements
- tgnode_elements = []
- for ginst, tnodes in tginsts:
- if tnodes:
- for tnode in tnodes:
- forms = tnode.generate()
- if verbosity:
- print(' Generating tnode form {}/{}:
{}'.format(tnode.token, tnode.features, forms))
- index = [(ginst, tnode.index)]
- self.nodes.append((forms, index))
-
- def set_chunks(self, verbosity=0):
- """Find output chunks: a list of sets of snode indices."""
- chunk_attribs = []
- for index, (tokens, constraints) in enumerate(self.nodes):
- # Is this an uncovered node/token
- if not constraints:
- chunk_attribs.append((tokens[0], {index}))
***The diff for this file has been truncated for email.***
=======================================
--- /l3lite/ui.py Sun Apr 20 07:07:10 2014 UTC
+++ /dev/null
@@ -1,206 +0,0 @@
-#
-# L3Lite UI: initial attempt at a user interface for creating languages
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.02.15
-# -- Created
-# 2014.03.04
-# -- UI class
-# 2014.03.18
-# -- Adding groups
-
-from .language import *
-import os, sys
-
-class UI:
- """Normally only one of these so doesn't have to be a class. Later a
subclass
- of tkinter Frame?"""
-
- # Editing the grammar/lexicon
- edit_mode = 0
- # Parsing and translating
- proc_mode = 1
-
- def __init__(self):
- self.languages = {}
- self.mode = UI.edit_mode
-
- @staticmethod
- def yes(response):
- return not response or response[0].lower() == 'y'
-
- def load_language(self):
- abbrev = input("Give abbreviation for language.\n>> ")
- path = os.path.join(LANGUAGE_DIR, abbrev + '.lg')
- try:
- language = Language.read(path)
- self.languages[abbrev] = language
- return language
- except IOError:
- print("That language doesn't seem to exist.")
- return
-
- def quit(self):
- """Quit the UI (and L3Lite)."""
- response = input("Are you sure you want to quit L3Lite? ")
- if UI.yes(response):
- self.write_languages()
- sys.exit()
-
- def write_languages():
- """Write the languages the user wants to save."""
- for language in self.languages.values():
- if language.changed:
- response = input("{} has been changed;
save?\n>> ".format(language.name))
- if UI.yes(response):
- language.write(LANGUAGE_DIR)
-
- def add_word(self, language):
- word = input("Write the word to be added to the lexicon.\n>> ")
- if word in language.words:
- response = input("There's already a word with that form in the
lexicon; add another? ")
- if UI.yes(response):
- return self.add_word1(word, language)
- return
- else:
- return self.add_word1(word, language)
-
- def add_word1(self, word, language):
- cls = None
- response = input("Do you want to assign a class to the word? ")
- if UI.yes(response):
- class_names = list(language.classes.keys())
- cls = input("Choose from these
classes:\n{}\n>> ".format(' '.join(class_names)))
- return language.add_word(word, cls=cls)
-
- def add_class(self, language):
- name = input("Write the name of the class to be added to the
lexicon.\n>> ")
- if name in self.language.classes:
- response = input("There's already a class with that name in
the lexicon; add a class with a different name? ")
- if UI.yes(response):
- return self.add_class1(name, language)
- return
- else:
- return self.add_class1(name, language)
-
- def add_class1(self, name, language):
- return language.add_class(name)
-
- def add_group(self, language):
- """Get the words that will be in the group. make_group() creates
the group."""
- words = input(
- """Write the words, lexemes, or classes in the group in their
typical order.
-Precede any lexemes with % and any classes with $.
->> """)
- words = words.split()
- response = input("Are these the words you want in the
group?\n{}\n".format(', '.join(words)))
- if UI.yes(response):
- return self.make_group(language, words)
- else:
- return self.add_group(language)
-
- def make_group(self, language, words, word_string=''):
- if not word_string:
- word_list = []
- for i, w in enumerate(words):
- word_list.append("[{}] {}".format(i+1, w))
- word_string = '\n'.join(word_list)
- head_index = input("Give the number of the word or lexeme that is
the head of the group.\n{}\n>> ".format(word_string))
- if not head_index.isdigit():
- print("You need to give a number between 1 and
{}".format(len(words)))
- return self.make_group(language, words,
word_string=word_string)
- else:
- head_index = int(head_index)
- if head_index > len(words):
- print("You need to give a number between 1 and
{}".format(len(words)))
- return self.make_group(language, words,
word_string=word_string)
- else:
- head_index = head_index - 1
- head = words[head_index]
- name = '_'.join(words)
- print("OK, the head is '{}'".format(head))
- print("Creating group {} with head {}".format(name, head))
- group = language.add_group(name, head,
head_lexeme=head.startswith(LEXEME_PRE), head_order=head_index)
- # A dictionary to associate order of words within the
group with their IDs (indices).
- order2index = {head_index: 0}
- for index, word in enumerate(words):
- if word == head:
- continue
- word_id = group.add_word(word, order=index)
- order2index[index] = word_id
- response = input("Create dependencies among words?\n")
- if response:
- return self.add_group_deps(group, word_string,
order2index=order2index)
- else:
- return self.add_group_deps(group, word_string,
first=False, finished=True, order2index=order2index)
-
- def add_group_deps(self, group, word_string, first=True,
finished=False, order2index=None):
- if not first:
- if not finished:
- response = input("Finished with dependencies? ")
- if UI.yes(response):
- finished = True
- if finished:
- for index, (lex, feats) in group.words.items():
- # For each word in the group, make sure it's either
- # the group head or that it has a mother within the
- # group.
- if index != 0 and 'm' not in feats:
- print("Making word {} a daughter of head with
default dependency".format(feats['o'] + 1))
- group.add_dep(0, index)
- return group
- else:
- return self.add_group_dep(group, word_string,
order2index=order2index)
- else:
- return self.add_group_dep(group, word_string,
order2index=order2index)
-
- def add_group_dep(self, group, word_string, src_index=None,
dest_index=None, order2index=None):
- if src_index is None:
- src_index = input("Give the index of the source word for a
dependency.\n{}\n>> ".format(word_string))
- if not src_index.isdigit() or int(src_index) >
len(group.words):
- print("You need to give a number between 1 and
{}".format(len(group.words)))
- return self.add_group_dep(group, word_string,
order2index=order2index)
- else:
- src_index = int(src_index) - 1
- if dest_index is None:
- dest_index = input("Give the index of the destination
word for the dependency.\n{}\n>> ".format(word_string))
- if not dest_index.isdigit() or int(dest_index) >
len(group.words):
- print("You need to give a number between 1 and
{}".format(len(group.words)))
- return self.add_group_dep(group, word_string,
src_index=src_index, order2index=order2index)
- else:
- dest_index = int(dest_index) - 1
- dep = input("If you want a particular dependency
type, enter it.\n>> ")
- if not dep:
- dep = Entry.dflt_dep
- response = input("OK to create dependency of type
{} from word {} to word {}?\n".format(dep, src_index + 1, dest_index + 1))
- if UI.yes(response):
- s = order2index[src_index]
- d = order2index[dest_index]
- # Actually create the dependency
- group.add_dep(s, d, dep=dep)
- return self.add_group_deps(group, word_string,
first=False, order2index=order2index)
-
-
-
=======================================
--- /l3lite/variable.py Fri May 2 22:53:47 2014 UTC
+++ /dev/null
@@ -1,651 +0,0 @@
-#
-# L3Lite variables and domain stores: required for constraint
satisfaction.
-#
-########################################################################
-#
-# This file is part of the HLTDI L^3 project
-# for parsing, generation, translation, and computer-assisted
-# human translation.
-#
-# Copyright (C) 2014, HLTDI <gas...@cs.indiana.edu>
-#
-# This program is free software: you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation, either version 3 of
-# the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-#
-# =========================================================================
-
-# 2014.02.14
-# -- Created. Copied from l3xdg/variable.py.
-# 2014.03.26
-# -- One variable class (SVar from l3xdg).
-
-# Maximum number of values for a variable.
-MAX = 200
-# Maximum set of integers
-ALL = set(range(MAX))
-
-class DStore:
- """Domain store holding domains for variables. (Really the domains are
held in
- dicts kept by the variables.)"""
-
- def __init__(self, name='', level=0, problem=None, parent=None):
- """This store is a strengthening of parent store if there is
one."""
- self.problem = problem
- self.parent = parent
- self.children = []
- self.name = name
- self.level = level
- # Undetermined variables
- self.undetermined = []
- # Essential undetermined variables
- self.ess_undet = []
-
- def __repr__(self):
- return '@ {}/{}'.format(self.name, self.level)
-
- def is_determined(self, essential=True, verbosity=0):
- """Are all variables in dstore determined that need to be
determined?"""
- if essential:
- if self.ess_undet:
- if verbosity:
- print('{} has {} undetermined variables'.format(self,
len(self.ess_undet)))
- return False
- else:
- return True
- elif self.undetermined:
- if verbosity:
- print('{} has {} undetermined variables'.format(self,
len(self.undetermined)))
- return False
- return True
-
- def clone(self, constraint=None, name='', project=False, verbosity=0):
- """Create a new dstore by applying the basic constraint
- to the bindings in this store."""
- new_store = DStore(name=name or self.name, level=self.level+1,
- problem=self.problem, parent=self)
- self.children.append(new_store)
- new_store.undetermined = self.undetermined[:]
- new_store.ess_undet = self.ess_undet[:]
- constraint.infer(dstore=new_store, verbosity=0, tracevar=[])
- for var in constraint.variables:
- # See if the new variable(?s) is now determined
- var.determined(dstore=new_store, verbosity=0)
- return new_store
-
-DS0 = DStore(name='top')
-
-class Var:
-
- # Threshold for "peripheral" variables
- weight_thresh = .5
-
- def __init__(self, name,
- lower_domain=None, upper_domain=None,
- lower_card=0, upper_card=MAX,
- problem=None, dstores=None, rootDS=None,
- constraint=None,
- # Whether a complete solution depends on a single value
for this variable
- essential=True,
- # Vars with low weights are "peripheral".
- weight=1):
- self.name = name
- self.problem = problem
-# if problem:
-# self.problem.add_variable(self)
- self.constraints = [constraint] if constraint else []
- self.essential = essential
- self.value = None
- # Normally initialize with a top-level domain store
- self.rootDS = rootDS or DS0
- # Values of this variable in different domain stores
- self.dstores = dstores or {self.rootDS: {}}
- # Add the variable to the list of undetermined variables for
- # the dstore
- self.rootDS.undetermined.append(self)
- if essential:
- self.rootDS.ess_undet.append(self)
- self.weight = weight
- if lower_domain != None:
- self.lower_domain = lower_domain
- else:
- self.lower_domain = set()
- if upper_domain != None:
- self.upper_domain = upper_domain
- else:
- self.upper_domain = ALL.copy()
- self.init_lower_card = max(lower_card, len(self.lower_domain))
- self.init_upper_card = min(upper_card, len(self.upper_domain))
- self.max = MAX
- self.init_values(dstore=self.rootDS)
-
- def __repr__(self):
- return '${}'.format(self.name)
-
- # Initializing bounds
-
- def init_values(self, dstore=None):
- self.set_lower(self.lower_domain, dstore=dstore)
- self.set_upper(self.upper_domain, dstore=dstore)
- self.set_lower_card(self.init_lower_card, dstore=dstore)
- self.set_upper_card(self.init_upper_card, dstore=dstore)
- self.set_value(None, dstore=dstore)
-
- def set_lower(self, lower, dstore=None):
- self.set(dstore, 'lower', lower)
-
- def set_upper(self, upper, dstore=None):
- self.set(dstore, 'upper', upper)
-
- def set_lower_card(self, lower_card, dstore=None):
- self.set(dstore, 'lower_card', lower_card)
-
- def set_upper_card(self, upper_card, dstore=None):
- self.set(dstore, 'upper_card', upper_card)
-
- def get_name(self):
- '''Function used in sorting lists of variables.'''
- return self.name
-
- def get_dstore(self, dstore):
- """Returns the dictionary of value and domain(s) for dstore."""
- dstore = dstore or self.rootDS
- return self.dstores.get(dstore, {})
-
- def add_dstore(self, dstore):
- """Adds a domain store to the dstores dict."""
- self.dstores[dstore] = {}
-
- def set(self, dstore, feature, value):
- """Sets feature to be value in dstore, creating a dict for dstore
if one doesn't exist."""
- dstore = dstore or self.rootDS
- dsdict = self.dstores.get(dstore, None)
- if dsdict == None:
- dsdict = {'value': None}
- self.dstores[dstore] = dsdict
- dsdict[feature] = value
-
- def set_value(self, value, dstore=None):
- """Sets the value of the variable in dstore."""
- self.set(dstore, 'value', value)
-
- def is_determined(self, dstore=None):
- """Is the variable already determined?"""
- return self.get_value(dstore=dstore) is not None
-
- def all_equal(self, variables, dstore=None):
- """Do all of these variables have the same value as this in
dstore?"""
- return all([self.equals(var, dstore=dstore) for var in variables])
-
- def equals(self, var, dstore=None):
- """Does this variable have the same value as var in dstore?
- """
- value = self.get_value(dstore=dstore)
- if value != None:
- var_val = var.get_value(dstore=dstore)
- if var_val == None:
- return False
- if var.get_lower_card(dstore=dstore) > 1:
- return False
-# var_val = list(var_val)[0] if var_val else ()
- if value == var_val:
- return True
- return False
-
- ## How constraints on a variable can fail
-
- def bound_fail(self, dstore=None):
- """Fail if the lower bound includes any elements not in the upper
bound."""
- return self.get_lower(dstore=dstore) -
self.get_upper(dstore=dstore)
-
- def card_fail(self, dstore=None):
- """Fail if the lower cardinality bound is greater than the upper
cardinality bound."""
- return self.get_lower_card(dstore=dstore) >
self.get_upper_card(dstore=dstore)
-
- def upper_bound_card_fail(self, dstore=None):
- """Fail if the length of upper bound < lower card."""
- return len(self.get_upper(dstore=dstore)) <
self.get_lower_card(dstore=dstore)
-
- def lower_bound_card_fail(self, dstore=None):
- """Fail if length of lower bound > upper card."""
- return len(self.get_lower(dstore=dstore)) >
self.get_upper_card(dstore=dstore)
-
- def fail(self, dstore=None):
- """Fail in one of three ways."""
- return self.bound_fail(dstore=dstore) or
self.card_fail(dstore=dstore)
-# or self.bound_card_fail(dstore=dstore)
-
- ## Getters
-
- def get(self, dstore, feature, default=None):
- """Returns a value for feature associated with dstore, recursively
- checking dstore's parent is nothing is found."""
- dstore_dict = self.dstores.get(dstore, {})
- x = dstore_dict.get(feature, None)
- if x != None:
- return x
- parent = dstore.parent
- if parent:
- return self.get(parent, feature, default=default)
- return default
-
- def get_value(self, dstore=None):
- """Return the value of the variable in dstore."""
- dstore = dstore or self.rootDS
- return self.get(dstore, 'value', None)
-
- def get_lower(self, dstore=None):
- dstore = dstore or self.rootDS
- return self.get(dstore, 'lower')
-
- def get_upper(self, dstore=None):
- dstore = dstore or self.rootDS
- return self.get(dstore, 'upper')
-
- def get_lower_card(self, dstore=None):
- dstore = dstore or self.rootDS
- return self.get(dstore, 'lower_card', 0)
-
- def get_upper_card(self, dstore=None):
- dstore = dstore or self.rootDS
- return self.get(dstore, 'upper_card', MAX)
-
- def get_undecided(self, dstore=None):
- """Returns the set of values that may or may not be in the
variable."""
- dstore = dstore or self.rootDS
- return self.get_upper(dstore=dstore) -
self.get_lower(dstore=dstore)
-
- def det_update(self, dstore=None):
- if dstore:
- dstore.undetermined.remove(self)
- if self.essential:
- dstore.ess_undet.remove(self)
-
- def determined(self, dstore=None, constraint=None, verbosity=0):
- """Attempt to determine the variable, returning the value if this
is possible,
- False if it's not."""
- val = self.get_value(dstore=dstore)
- if val != None:
- return val
- def determined_help(value, dst, verb):
- value_card = len(value)
- lower_card = self.get_lower_card(dstore=dst)
- upper_card = self.get_upper_card(dstore=dst)
- if value_card < lower_card:
- s = "{} lowering lower card for {} to {}, less than
previous value {}"
- raise(VarError(s.format(constraint, self, value_card,
lower_card)))
- if value_card > upper_card:
- s = "{} raising upper card for {} to {}, greater than
previous value {}"
- raise(VarError(s.format(constraint, self, value_card,
upper_card)))
- self.set_value(value, dstore=dst)
- self.set_lower(value, dstore=dst)
- self.set_upper(value, dstore=dst)
- self.set_lower_card(value_card, dstore=dst)
- self.set_upper_card(value_card, dstore=dst)
- if verb > 1:
- print(' {} is determined at {}'.format(self, value))
- self.det_update(dstore=dst)
- return value
- lower = self.get_lower(dstore=dstore)
- upper = self.get_upper(dstore=dstore)
- if lower == None or upper == None:
- return False
- # If upper and lower bounds are equal, determine at either
- if lower == upper:
- return determined_help(lower, dstore, verbosity)
- # Combine cardinality and set bounds to determine
- # If the length of the upper bound is <= the lower cardinality
bound,
- # then make the upper bound the value
- if len(upper) <= self.get_lower_card(dstore=dstore):
- return determined_help(upper, dstore, verbosity)
- if len(lower) >= self.get_upper_card(dstore=dstore):
- return determined_help(lower, dstore, verbosity)
- return False
-
- ## Methods that can change the variable's set bounds or cardinality
bounds
-
- def determine(self, value, dstore=None, constraint=None):
- """Attempt to determine the variable as value, returning False it
can't be
- or if it's already determined."""
- if self.is_determined(dstore=dstore):
- return False
- value = value if isinstance(value, set) else {value}
- orig_upper = self.get_upper(dstore=dstore)
- orig_lower = self.get_lower(dstore=dstore)
- upper = self.get_upper(dstore=dstore)
- if not value.issubset(orig_upper):
- # Var can't be determined at this value
- return False
- if constraint:
- print(' {} determining {} as {}'.format(constraint, self,
value))
- val_card = len(value)
- self.set_lower(value, dstore=dstore)
- self.set_upper(value, dstore=dstore)
- self.set_value(value, dstore=dstore)
- self.set_lower_card(val_card, dstore=dstore)
- self.set_upper_card(val_card, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- if orig_upper != value or orig_lower != value:
- return True
- return False
-
- def strengthen_upper(self, upper2, dstore=None, constraint=None,
- reduce=False, det=False):
- """Strengthens the upper bound by intersecting it with upper2.
- If det is True, attempt to determine variable.
- """
- upper = self.get_upper(dstore=dstore)
- if not isinstance(upper, set):
- print("{}'s upper {} is not set".format(self, upper))
- if not upper.issubset(upper2):
- new_upper = upper.intersection(upper2)
- lower_card = self.get_lower_card(dstore=dstore)
- if new_upper == upper:
- return False
- lower = self.get_lower(dstore=dstore)
- if not lower.issubset(new_upper) and constraint:
- s = 'Warning: attempting to change upper bound of {} to
{}, which is not a superset of lower bound {}'
- print(s.format(self, new_upper, lower))
- if len(new_upper) < lower_card and constraint:
- s = 'Warning: attempting to change upper bound of {} to
{}, which is smaller than lower card {}'
- print(s.format(self, new_upper, lower_card))
- if constraint:
- s = ' {} strengthening upper bound of {} ({}) with {},
now {}'
- print(s.format(constraint, self, upper, upper2, new_upper))
- self.set_upper(new_upper, dstore=dstore)
- if det:
- if new_upper == lower:
-# print('Determining', self)
- val_len = len(lower)
- self.set_value(lower, dstore=dstore)
- self.set_lower_card(val_len, dstore=dstore)
- self.set_upper_card(val_len, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- elif len(new_upper) == lower_card:
- val_len = lower_card
- self.set_lower(new_upper, dstore=dstore)
- self.set_value(new_upper, dstore=dstore)
- self.set_upper_card(val_len, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- return True
- return False
-
- def discard_upper(self, value, dstore=None, constraint=None):
- """Discard set or element from upper bound."""
- upper = self.get_upper(dstore=dstore)
- value = value if isinstance(value, set) else {value}
- if value & upper:
- new_upper = upper - value
- new_upper_card = len(new_upper)
- lower = self.get_lower(dstore=dstore)
- if new_upper_card < len(lower) and constraint:
- s = 'Warning: attempting to discard {} from upper bound {}
of {}, making it smaller than lower bound {}'
- print(s.format(value, upper, self, lower))
- lower_card = self.get_lower_card(dstore=dstore)
- if new_upper_card < lower_card:
- s = 'Warning: attempting to discard {} from upper bound {}
of {}, making cardinality smaller than {}'
- print(s.format(value, upper, self, lower_card))
- # If value and upper overlap
- if constraint:
- print(' {} discarding {} from {}'.format(constraint,
value, self))
- self.set_upper(new_upper, dstore=dstore)
- self.set_upper_card(new_upper_card, dstore=dstore)
- return True
- return False
-
- def strengthen_lower(self, lower2, dstore=None, constraint=None,
det=False):
- """Strengthens the lower bound by unioning it with lower2."""
- lower = self.get_lower(dstore=dstore)
- if not lower.issuperset(lower2):
- new_lower = lower.union(lower2)
- upper = self.get_upper(dstore=dstore)
- upper_card = self.get_upper_card(dstore=dstore)
- if not new_lower.issubset(upper) and constraint:
- s = 'Warning: attempting to change lower bound of {} to
{}, which is not a subset of upper bound {}'
- print(s.format(self, new_lower, upper))
- if len(new_lower) > upper_card and constraint:
- s = 'Warning: attempting to change lower bound of {} to
{}, which is greater than upper card {}'
- print(s.format(self, new_lower, upper_card))
- if constraint:
- print(' {} strengthening lower bound of {} with
{}'.format(constraint, self, lower2))
- self.set_lower(new_lower, dstore=dstore)
- if det:
- if new_lower == upper and upper_card ==
self.lower_card(dstore=dstore):
- self.set_value(upper, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- return True
- return False
-
- def strengthen_lower_card(self, lower2, dstore=None, constraint=None,
det=False):
- """Raises the lower bound on the cardinality of the set."""
- if lower2 > self.get_lower_card(dstore=dstore):
- if constraint:
- print(' {} raising lower cardinality bound of {} to
{}'.format(constraint, self, lower2))
- self.set_lower_card(lower2, dstore=dstore)
- if det:
- upper_card = self.get_upper_card(dstore=dstore)
- if lower2 == upper_card:
- upper = self.get_upper(dstore=dstore)
- if len(upper) == upper_card:
- # Determine
- self.set_lower(upper, dstore=dstore)
- self.set_value(upper, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- return True
- return False
-
- def strengthen_upper_card(self, upper2, dstore=None, constraint=None,
det=False):
- """Lowers the upper bound on the cardinality of the set."""
- if upper2 < self.get_upper_card(dstore=dstore):
- if constraint:
- print(' {} lowering upper cardinality bound of {} to
{}'.format(constraint, self, upper2))
- self.set_upper_card(upper2, dstore=dstore)
- if det:
- lower_card = self.get_lower_card(dstore=dstore)
- if upper2 == lower_card:
- lower = self.get_lower(dstore=dstore)
- if len(lower) == lower_card:
- # Determine
- self.set_upper(lower, dstore=dstore)
- self.set_value(lower, dstore=dstore)
- if dstore and self in dstore.undetermined:
- self.det_update(dstore)
- return True
- return False
-
- ## Printing
-
- @staticmethod
- def string_range(lower, upper):
- s = '{'
- for i,v in enumerate(upper):
- if i != 0:
- s += ','
- if v not in lower:
- s += '({})'.format(v)
- else:
- s += '{}'.format(v)
- return s + '}'
-
- def pretty_string(self, dstore=None, spaces=0, end='\n'):
- return '{0}${1}:{2}|{3},{4}|'.format(spaces*' ',
- self.name,
-
Var.string_range(self.get_lower(dstore=dstore),
-
self.get_upper(dstore=dstore)),
-
self.get_lower_card(dstore=dstore),
-
self.get_upper_card(dstore=dstore))
-
- def pprint(self, dstore=None, spaces=0, end='\n'):
- print(self.pretty_string(dstore=dstore, spaces=spaces, end=end))
-
-class IVar(Var):
-
- def __init__(self, name, domain=None,
- problem=None, dstores=None, rootDS=None,
- # Vars with low weights are "peripheral".
- weight=1, essential=True):
- Var.__init__(self, name,
- lower_domain=set(), upper_domain=domain,
- lower_card=1, upper_card=1,
- problem=problem, dstores=dstores, rootDS=rootDS,
- weight=weight, essential=essential)
-
- def __repr__(self):
- return '#{}'.format(self.name)
-
- def equals(self, var, dstore=None, pattern=False):
- """Does this variable have the same value as var in dstore?
- var could be an IVar."""
- value = self.get_value(dstore=dstore)
- var_val = var.get_value(dstore=dstore)
- is_ivar = isinstance(var, IVar)
- if value != None and var_val != None:
-# if is_ivar:
-# print('var {}, value {}'.format(var, var_val))
-# var_val = {var_val} if var_val else set()
- if value == var_val:
- return True
- return False
-
- def determined(self, dstore=None, constraint=None, verbosity=0):
- """Attempt to determine the variable, returning the value if this
is possible,
- False if it's not."""
- val = self.get_value(dstore=dstore)
- if val != None:
- return val
- upper = self.get_upper(dstore=dstore)
- if len(upper) == 1:
- self.set_value(upper, dstore=dstore)
- self.set_lower(upper, dstore=dstore)
- if verbosity > 1:
- print(' {} is determined at {}'.format(self, upper))
- if dstore:
- self.det_update(dstore)
- return upper
- return False
-
- def pretty_string(self, dstore=None, spaces=0, end='\n'):
- return '{0}#{1}:{2}'.format(spaces*' ',
- self.name,
- self.get_upper(dstore=dstore))
-
-### Variables that are pre-determined.
-
-class DetVar(Var):
- """Pre-determined variable. If DStore is not specified in constructor,
- the variable is determined in all DStores. Should not be modified."""
-
- def __init__(self, name, value, dstore=None):
- Var.__init__(self, name, rootDS=dstore)
- self.dstore = dstore
- if self.dstore:
- self.determine(value, dstore=dstore)
- else:
- self.value = value
- self.lower_domain = value
- self.upper_domain = value
- self.set_cards(value)
-
- def __repr__(self):
- return '$!{}:{}'.format(self.name, self.value)
-
- def set_cards(self, value):
- self.init_upper_card = len(value)
- self.init_lower_card = len(value)
-
- def init_values(self, dstore=None):
- # Don't do anything
- pass
-
- def set(self, dstore, feature, value):
- """Override set in Variable to prevent changes."""
- # This message should print out under some verbosity conditions.
- s = 'Warning: attempting to change pre-determined variable {},
feature: {}, value: {}, orig value: {}'
- print(s.format(self, feature, value, self.get(dstore, feature)))
- return False
-
- def is_determined(self, dstore=None):
- return True
-
- def pretty_string(self, dstore=None, spaces=0, end='\n'):
- return '{0}$!{1}:{2}'.format(spaces*' ', self.name,
self.get(dstore, 'value'))
-
- def cost(self, dstore=None):
- return 0
-
- def determined(self, dstore=None, verbosity=0, constraint=None):
- if self.dstore:
- return Var.determined(self, dstore=dstore,
verbosity=verbosity, constraint=constraint)
- return self.value
-
- def get(self, dstore, feature, default=None):
- if self.dstore:
- return Var.get(self, dstore, feature, default=default)
- if feature in {'value', 'lower', 'upper'}:
- return self.value
- if feature in {'lower_card', 'upper_card'}:
- return len(self.value)
-
-class DetIVar(DetVar, IVar):
-
- def __init__(self, name='', value=0, dstore=None):
- IVar.__init__(self, name, rootDS=dstore)
- # value could be the empty set
- if not isinstance(value, set):
- value = {value}
- DetVar.__init__(self, name, value, dstore)
- self.init_domain = value
- self.default_value = value
-
- def __repr__(self):
- return '#!{}:{}'.format(self.name, list(self.value)[0])
-
- def init_values(self, dstore=None):
- # Don't do anything
- pass
-
- def set_cards(self, value):
- self.init_upper_card = 1
- self.init_lower_card = 1
-
- def pretty_string(self, dstore=None, spaces=0, end='\n'):
- return '{0}#!{1}:{2}'.format(spaces*' ', self.name,
self.get(dstore, 'value'))
-
- def get(self, dstore, feature, default=None):
- if self.dstore:
- return IVar.get(self, dstore, feature, default=default)
- if feature == 'value':
- return self.value
- if feature in ('dom', 'upper', 'lower'):
- if isinstance(self.value, set):
- return self.value
- else:
- return {self.value}
- if feature in ('lower_card', 'upper_card'):
- return 1
-
-class VarError(Exception):
- '''Class for errors encountered when attempting to execute an event on
a variable.'''
-
- def __init__(self, value):
- self.value = value
-
- def __str__(self):
- return repr(self.value)
-
-# Constant variables, determined in all DStores
-EMPTY = DetVar("empty", set())
=======================================
--- /hiiktuu.py Fri May 2 22:53:47 2014 UTC
+++ /hiiktuu.py Tue May 6 07:09:41 2014 UTC
@@ -36,6 +36,75 @@
#import cProfile
#import pstats

+def agr_test1():
+ # This should constraint seq vars seq0 and seq2 to be {2} and {3}
+ sel = l3lite.DetVar('sel', {(0, 1, ('sn', 'sn'), ('sp', 'sp')),
+ (2, 3, ('tam', 'tns'))})
+ seq = [l3lite.Var('seq0', set(), {1, 2}, 1, 1), l3lite.DetVar('seq1',
{0}),
+ l3lite.Var('seq2', set(), {3, 5}, 1, 1), l3lite.DetVar('seq3',
{4})]
+ feat = [l3lite.DetLVar('f0', [l3lite.Features({'sn': 0, 'sp': 3})]),
+ l3lite.DetLVar('f1', [l3lite.Features({'sn': 1})]),
+ l3lite.DetLVar('f2', [l3lite.Features({'sn': 0, 'sp': 3, 'sg':
1})]),
+ l3lite.DetLVar('f3', [l3lite.Features({'tam': 'ps'})]),
+ l3lite.DetLVar('f4', [l3lite.Features({'tns': 'ps'})]),
+ l3lite.DetLVar('f5', [l3lite.Features({'tam': 'pr'})])]
+ agr = l3lite.AgrSelection(feat, sel, seq)
+ return agr
+
+def agr_test2():
+ # This should constrain feat var f0 to [{sn: 1, sp: 3}]
+ sel = l3lite.DetVar('sel', {(0, 1, ('sn', 'sn'), ('sp', 'sp')),
+ (2, 3, ('tam', 'tns'))})
+ seq = [l3lite.DetVar('seq0', {2}), l3lite.DetVar('seq1', {0}),
+ l3lite.Var('seq2', set(), {3, 5}, 1, 1), l3lite.DetVar('seq3',
{4})]
+ feat = [l3lite.LVar('f0', [], [l3lite.Features({'sn': 0, 'sp': 3}),
+ l3lite.Features({'sn': 0, 'sp': 2}),
+ l3lite.Features({'sn': 1, 'sp': 3})],
+ 1, 1),
+ l3lite.DetLVar('f1', [l3lite.Features({'sn': 1})]),
+ l3lite.DetLVar('f2', [l3lite.Features({'sn': 0, 'sp': 3, 'sg':
1})]),
+ l3lite.DetLVar('f3', [l3lite.Features({'tam': 'ps'})]),
+ l3lite.DetLVar('f4', [l3lite.Features({'tns': 'ps'})]),
+ l3lite.DetLVar('f5', [l3lite.Features({'tam': 'pr'})])]
+ agr = l3lite.AgrSelection(feat, sel, seq)
+ return agr
+
+def agr_test3():
+ # This should fail.
+ sel = l3lite.DetVar('sel', {(0, 1, ('sn', 'sn'), ('sp', 'sp')),
+ (2, 3, ('tam', 'tns'))})
+ seq = [l3lite.DetVar('seq0', {2}), l3lite.DetVar('seq1', {0}),
+ l3lite.Var('seq2', set(), {3, 5}, 1, 1), l3lite.DetVar('seq3',
{4})]
+ feat = [l3lite.LVar('f0', [], [l3lite.Features({'sn': 0, 'sp': 3}),
+ l3lite.Features({'sn': 0, 'sp': 2}),
+ l3lite.Features({'sn': 1, 'sp': 3})],
+ 1, 1),
+ l3lite.DetLVar('f1', [l3lite.Features({'sn': 1})]),
+ l3lite.DetLVar('f2', [l3lite.Features({'sn': 0, 'sp': 1, 'sg':
1})]),
+ l3lite.DetLVar('f3', [l3lite.Features({'tam': 'ps'})]),
+ l3lite.DetLVar('f4', [l3lite.Features({'tns': 'ps'})]),
+ l3lite.DetLVar('f5', [l3lite.Features({'tam': 'pr'})])]
+ agr = l3lite.AgrSelection(feat, sel, seq)
+ return agr
+
+def agr_test4():
+ # This should be entailed.
+ sel = l3lite.DetVar('sel', {(0, 1, ('sn', 'sn'), ('sp', 'sp')),
+ (2, 3, ('tam', 'tns'))})
+ seq = [l3lite.DetVar('seq0', {2}), l3lite.DetVar('seq1', {0}),
+ l3lite.Var('seq2', set(), {3, 5}, 1, 1), l3lite.DetVar('seq3',
{4})]
+ feat = [l3lite.LVar('f0', [], [l3lite.Features({'sn': 0, 'sp': 1}),
+ l3lite.Features({'sn': 0, 'sp':
1, 'sg': 1}),
+ l3lite.Features({'sn': 0, 'sg': 3})],
+ 1, 1),
+ l3lite.DetLVar('f1', [l3lite.Features({'sn': 1})]),
+ l3lite.DetLVar('f2', [l3lite.Features({'sn': 0, 'sp': 1, 'sg':
1})]),
+ l3lite.DetLVar('f3', [l3lite.Features({'tam': 'ps'})]),
+ l3lite.DetLVar('f4', [l3lite.Features({'tns': 'ps'})]),
+ l3lite.DetLVar('f5', [l3lite.Features({'tam': 'ps', 'sn':
2})])]
+ agr = l3lite.AgrSelection(feat, sel, seq)
+ return agr
+
def ate_fish(verbosity=0):
"""
Amh->Orm
@@ -48,10 +117,10 @@
s.do(verbosity=verbosity)
return s

-def never_eaten_fish(verbosity=0):
+def never_eaten_fish(trans=True, verbosity=0):
"""
- Amh->Orm
- አሳ በልቶ አያውቅም (he's never eaten fish) -> qurxummii nyaate hin beeku.
+ Amh አሳ በልቶ አያውቅም 'he's never eaten fish'
+ Either parse (trans=False) or translate -> Orm: qurxummii nyaate hin
beeku.
Illustrates
(1) source-target feature agreement
(2) source-target group translation mismatch in word count
@@ -59,7 +128,26 @@
amh, orm = l3lite.Language.load('amh', 'orm')
s = l3lite.Sentence(raw="አሳ በልቶ አያውቅም", language=amh, target=orm,
verbosity=verbosity)
- s.do(verbosity=verbosity)
+ if trans:
+ s.do(verbosity=verbosity)
+ else:
+ s.parse(verbosity=verbosity)
+ return s
+
+def never_eaten_fish_ungr(trans=True, verbosity=0):
+ """
+ Amh አሳ በልተው አያውቅም 'he's never eaten fish' (ungrammatical because the
+ በልተው is 3rd person *plural* so it doesn't agree with አያውቅም).
+ Like the last case except since this is ungrammatical, no solution is
+ found that covers all of the words.
+ """
+ amh, orm = l3lite.Language.load('amh', 'orm')
+ s = l3lite.Sentence(raw="አሳ በልተው አያውቅም", language=amh, target=orm,
+ verbosity=verbosity)
+ if trans:
+ s.do(verbosity=verbosity)
+ else:
+ s.parse(verbosity=verbosity)
return s

def cantar_las_cuarenta(trans=True, verbosity=0):
Reply all
Reply to author
Forward
0 new messages