Modified:
/branches/thefuture-modules/thebeast-apps/src/main/scala/org/riedelcastro/thebeast/apps/DependencyParsing.scala
/branches/thefuture-modules/thebeast-core/src/main/scala/org/riedelcastro/thebeast/env/vectors/Vector.scala
=======================================
---
/branches/thefuture-modules/thebeast-apps/src/main/scala/org/riedelcastro/thebeast/apps/DependencyParsing.scala
Thu Mar 25 22:50:25 2010
+++
/branches/thefuture-modules/thebeast-apps/src/main/scala/org/riedelcastro/thebeast/apps/DependencyParsing.scala
Fri Mar 26 20:22:47 2010
@@ -32,11 +32,11 @@
}
def asTokenProperties[T](root:T, seq:Seq[T]):Seq[(Int,T)] = {
- Seq((0,root)) ++ (for (i <- 0 until seq.size) yield i -> seq(i))
+ Seq((0,root)) ++ (for (i <- 0 until seq.size) yield i + 1 -> seq(i))
}
- def loadCoNLLFile(file: String): Seq[Env] = {
+ def loadCoNLLFile(file: String, from:Int, to:Int): Seq[Env] = {
val result = new ArrayBuffer[Env]
val rows = new ArrayBuffer[Array[String]]
for (line <- Source.fromFile(file).getLines.map(_.trim)) {
@@ -46,6 +46,8 @@
env.atoms(word) ++= asTokenProperties(ROOT,rows.map(row => row(1)))
env.atoms(pos) ++= asTokenProperties(ROOT,rows.map(row => row(3)))
env.atoms(link) ++= rows.map(row => row(6).toInt->row(0).toInt)
+ result += env
+ if (result.size == to) return result.drop(from)
} else {
rows += line.split("\\s+")
}
@@ -140,3 +142,37 @@
}
}
+
+object NaiveDependencyParsingApp {
+
+ import DependencyParsing._
+
+ def main(args:Array[String]) = {
+ val trainData = loadCoNLLFile(args(0),0,20)
+ println(trainData(0)(word))
+ println(trainData(0)(pos))
+ println(trainData(0)(link))
+
+ val posPairProbs = trainNaively(trainData)
+ println(posPairProbs)
+
+ val posPair = vectorSum(Tokens, Tokens, Tags, Tags) {
+ (h, m, h_pos, m_pos) =>
+ $(pos(h, h_pos) && pos(m, m_pos) && link(h, m)) * unit("Pos",
h_pos, m_pos)
+ }
+
+ val treeConstraint = SpanningTreeConstraint(link, token, 0,
LessThan(Tokens))
+
+ val theta = VectorVar("theta")
+ //val linearModel = ((wordPair + posPair + bias) dot weightVar) +
treeConstraint
+ val linearModel = (posPair dot theta)
+ val probModel = normalize(exp(linearModel) * ptree(link, token, 0,
LessThan(Tokens)))
+
+ val weights = new Vector
+ for (pair <- posPairProbs) weights("Pos",pair._1._1,pair._1._2) =
pair._2
+
+ println(weights)
+
+ }
+
+}
=======================================
---
/branches/thefuture-modules/thebeast-core/src/main/scala/org/riedelcastro/thebeast/env/vectors/Vector.scala
Sun Mar 21 14:06:34 2010
+++
/branches/thefuture-modules/thebeast-core/src/main/scala/org/riedelcastro/thebeast/env/vectors/Vector.scala
Fri Mar 26 20:22:47 2010
@@ -77,7 +77,7 @@
override def toString =
store.elements.foldLeft("") {
(s, e) =>
- s + e._1.asInstanceOf[Collection[_]].mkString(",") + "\t" +
e._2.toString + "\n"
+ s +
e._1.asInstanceOf[Collection[_]].map("%-6s".format(_)).mkString(" ") + "\t"
+ e._2.toString + "\n"
}
}