[clml] r128 committed - update

0 views

Skip to first unread message

cl...@googlecode.com

unread,

Jun 29, 2011, 3:51:21 PM6/29/11

to cl...@googlegroups.com

Revision: 128
Author: sbo...@isetsu.net
Date: Wed Jun 29 12:51:08 2011
Log: update
http://code.google.com/p/clml/source/detail?r=128

Added:
/trunk/pyphon/src/twotierlangs.py
Modified:
/trunk/pyphon/src/pyphon_find2tlanguages.py
/trunk/pyphon/src/pyphon_recurse.py
/trunk/pyphon/src/pyphon_twotier.py

=======================================
--- /dev/null
+++ /trunk/pyphon/src/twotierlangs.py Wed Jun 29 12:51:08 2011
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+
+#Give this the output filename from pyphon_twotier -- it'll give you
ERC-set pairs which together define an Input-intermediate-output
relationship consistent with the provided observations.
+
+from pyphon import *
+from permutationdistance import getrankings, set_dif
+
+def print_languages(languages, log, i):
+ for language in languages:
+ print "Language:", i
+ log.write("Language "+str(i)+"\n")
+ print "Cyclic ERCs", language[0]
+ log.write(str(language[0]))
+ log.write("\n")
+ rs=getrankings(language[0], 10)
+ print "Sample Cyclic Rankings", rs
+ log.write(str(language[1]))
+ log.write("\n")
+ print "Postlex ERCs", language[1]
+ rs=getrankings(language[1], 10)
+ print "Sample Postlex Rankings", rs
+
+ for member in language[2]:
+ print member[0], member[1], member[2]
+ log.write(str(member[0])+" "+str(member[1])+" "+str(member[2])+"\n")
+ #print language[3]
+
+def compatible_languages(cohort, languages):
+ retlanguages = []
+ if languages == []:
+ for member in cohort:
+ retlanguages.append([member[3].rsplit("_"), member[4].rsplit("_"),
[member]])
+
+ else:
+ for language in languages:
+ for member in cohort:
+ es1 = RNF.RNF(language[0]+member[3].rsplit("_"))
+ es2 = RNF.RNF(language[1]+member[4].rsplit("_"))
+
+ #are the above really equivalent to concatenate + simplify?
+
+ es1 = language[0]+member[3].rsplit("_")
+
+ es2 = language[1]+member[4].rsplit("_")
+
+ for erc in es1:
+ if not "l" in erc:
+ es1.remove(erc)
+
+ for erc in es2:
+ if not "l" in erc:
+ es2.remove(erc)
+
+
+ if (RNF.consistent(es1) and RNF.consistent(es2)):
+ #print "success"
+ retlanguages.append([es1, es2, language[2]+[member]])
+ #else:
+ #print "fail"
+ #print RNF.consistent(es1), es1
+ #print RNF.consistent(es2), es2
+
+
+ todelete = []
+ for i in range(len(retlanguages)):
+ for j in range(i):
+ #print "checking", i, j
+ if retlanguages[i][0] == retlanguages[j][0] and retlanguages[i][1] ==
retlanguages[j][1]:
+ todelete.append(i)
+ pfretlanguages=[]
+ for i in range(len(retlanguages)):
+ if i not in todelete:
+ pfretlanguages.append(retlanguages[i])
+
+ return pfretlanguages
+
+def find2tlangs(infilename):
+ if ".csv" in infilename:
+ infilename = infilename[:-4]
+
+ log = open(infilename+"_languages.csv",'w')
+
+
+ infile = open(infilename + ".csv", 'r')
+
+
+
+ seqs = []
+ for line in infile:
+ if ("input form" in line):
+ continue
+ line = line[:-1]
+ seq = line.rsplit(",") #build pairs of IO mappings
+ seqs.append(seq)
+
+ cohorts = []
+ cohort = []
+ #def next_input(erc, combos):
+
+ seqs.sort()
+
+ prev = ""
+ for i in range(len(seqs)):
+ if prev == "" or seqs[i][0] == prev:
+ cohort.append(seqs[i])
+ else:
+ cohorts.append(cohort)
+ cohort=[seqs[i]]
+ prev = seqs[i][0]
+ cohorts.append(cohort)
+
+
+ #next_input()
+
+
+
+
+ languages = []
+
+ for cohort in cohorts:
+ languages = compatible_languages(cohort, languages)
+ print "Adding cohort with", len(languages), "languages:", cohort[0][0]
+ #print_languages(languages)
+ if len(languages)==0:
+ print "No languages found. Sorry!"
+ break
+
+ annotated_languages = []
+ i=0
+ for language in languages:
+ i+=1
+ #language.append(cover_dist(language[0], language[1]))
+ annotated_languages.append(language)
+ print_languages([language], log, i)
+
=======================================
--- /trunk/pyphon/src/pyphon_find2tlanguages.py Tue Jun 28 16:12:00 2011
+++ /trunk/pyphon/src/pyphon_find2tlanguages.py Wed Jun 29 12:51:08 2011
@@ -5,9 +5,6 @@
import sys
from pyphon import *
from permutationdistance import getrankings, set_dif
-#from temperctools import cover_dist
-from permutationdistance import getrankings
-

infilename = sys.argv[-1]
if ".csv" in infilename:
@@ -18,6 +15,8 @@

infile = open(infilename + ".csv", 'r')

+
+
seqs = []
for line in infile:
if ("input form" in line):
@@ -30,6 +29,8 @@
cohort = []
#def next_input(erc, combos):

+seqs.sort()
+
prev = ""
for i in range(len(seqs)):
if prev == "" or seqs[i][0] == prev:
=======================================
--- /trunk/pyphon/src/pyphon_recurse.py Tue Jun 28 16:12:00 2011
+++ /trunk/pyphon/src/pyphon_recurse.py Wed Jun 29 12:51:08 2011
@@ -68,6 +68,10 @@

pyphon_maketableaux.main(model, infilename+"_ri", "ot", "temptab")

+ cmd = "cat temptab.csv"
+ os.system(cmd)
+
+
cache = []

for word in wordsrem:
@@ -175,6 +179,7 @@
inputs = []

for line in infile:
+ print line
line = line.rstrip("\n")

# Separate the affixes
=======================================
--- /trunk/pyphon/src/pyphon_twotier.py Tue Jun 28 16:12:00 2011
+++ /trunk/pyphon/src/pyphon_twotier.py Wed Jun 29 12:51:08 2011
@@ -1,7 +1,12 @@
#!/usr/bin/env python
import sys, os, re, string
-sys.path[0:0] = 'src' # puts the /foo directory at the start of your path
-from pyphon import *
+sys.path[0:0] = 'src'
+import pyphon
+import twotierlangs
+
+
+#Performs two tier evaluation: If an input file contains comma-separated
input output pairs, then this finds intermediate forms and ercs/rankings
that can allow two-tier generation of those pairs. If only inputs are
provided, a complete typology of outputs is provided.
+#Takes arguments: input filename; model filename

infilename = sys.argv[-1]
if ".csv" in infilename:
@@ -17,135 +22,128 @@
con = []
failed = []

-for line in infile:
+for line in infile: #read the input
if (line[0] == "#"):
continue
line = line[:-1]
pair = line.rsplit(", ") #build pairs of IO mappings
if len(pair)!= 2:
continue
+ pair.extend([[],[],[],False, re.sub("(\\\#)|(\\\.)", "", pair[1])])
#checked, intermediates, interm ercs, locsat, target
ios.append(pair)
-
-for pair in ios: #For each IO pair
- checked = []
- intermediates = []
- intermediate_ercs = []
- locally_satisfied = False
- target = re.sub("(\\\#)|(\\\.)", "", pair[1])
- print "Analyzing pair", pair[0], target
-
-
- # Produce the stem input
- loutput = open(model+"_temp.csv",'w')
- loutput.write(pair[0]+'\n')
- loutput.close()
-
- cmd = "pyphon_recurse.py " + model + " " + model + "_temp"
- print cmd
- os.system(cmd)
-
- infile = open(model + "_" + model + "_temp_recursive_typology.csv", 'r')
-
- loutput = open(model+"_temp.csv",'w')
-
- for line in infile: #take these as inputs for MakeTab
- line = line[:-1]
- if (line[0:5] == "Input"):
- continue
- print line
- sectioned = line.rsplit(', ')
-
- if sectioned[0] in intermediates:
- print 'hrnh?'
-
- intermediates.append(sectioned[0])
- intermediate_ercs.append(sectioned[1])
-
- #print "Twotier got intermmediate form", sectioned[0], sectioned[1]
-
- #produce input line
- loutput.write(sectioned[0]+'\n')
-
- loutput.close()
- infile.close()
-
- cmd = "pyphon_maketableaux.py " + model + " " + model + "_temp.csv OT"
- print cmd
- os.system(cmd)
-
- #Read tableau
- infile = open( model + "-" + model + "_temp-OT.csv", 'r')
-
- for line in infile: #take these as inputs for MakeTab
- line = line[:-1]
- if (con == [] and ",,," in line):
- sectioned = line.rsplit(',')
- con = sectioned[3:]
+
+loutput = open("twotier_temp.csv",'w')
+for pair in ios:
+ loutput.write(pair[0]+'\n') #write inputs for the first tier
+loutput.close()
+
+cmd = "pyphon_recurse.py " + model + " " + "twotier_temp" #run the first
tier
+print cmd
+os.system(cmd)
+
+
+#scan the resulting typology
+infile = open(model + "_twotier_temp_recursive_typology.csv", 'r')
+
+loutput = open("twotier_temp.csv",'w')
+
+for line in infile: #take these as inputs for MakeTab
+ line = line[:-1]
+ if (line[0] == ","):
+ continue
+ print line
+ sectioned = line.rsplit(', ')
+
+ for pair in ios:
+ if sectioned[0] != pair[0]:
continue
- if(line[0]==","):
- continue
-
- print "Twotier Read CON:", con
-
- infile.close()
-
- cmd = "pyphon_generate.py " + model + "-" + model + "_temp-OT "
+ "tiertwo_temp.csv OT"
- print cmd
- os.system(cmd)
-
- cmd = "cat " + "tiertwo_temp.csv"
- print cmd
- os.system(cmd)
-
- infile = open( "tiertwo_temp.csv", 'r')
-
- for line in infile: #scan the intermediate-final form pairs
- line = line[:-1]
- if(line[0]=="/"):
- seg = line.rsplit(",")
- n = re.search('(?<=/).*(?=/)', seg[0])
- input = n.group(0)
- n = re.search('(?<=\[).*(?=\])', seg[1])
- output = n.group(0)
-
- if len(seg)>len(con)+2:
- ercs = ",".join(seg[(len(con)+1):])
- localercs=[]
- for n in re.finditer('[wel]+', ercs):
- localercs.append(n.group(0))
- localerc = "_".join(localercs)
- else:
- localerc = 'e'*len(con)
-
-
- if target==output and [input, output] not in checked:
- checked.append([input, output])
- for i in range(len(intermediates)):
- if input == intermediates[i]:
- initialerc = intermediate_ercs[i]
+
+ pair[3].append(sectioned[1])
+ pair[4].append(sectioned[2])
+
+ #produce input line
+ loutput.write(sectioned[1]+'\n')
+
+loutput.close()
+infile.close()
+
+cmd = "pyphon_maketableaux.py " + model + " twotier_temp.csv OT
twotier_tab_temp.csv"
+print cmd
+os.system(cmd)
+
+#Read tableau
+infile = open("twotier_tab_temp.csv", 'r')
+
+for line in infile: #take these as inputs for MakeTab
+ line = line[:-1]
+ if (con == [] and ",,," in line):
+ sectioned = line.rsplit(',')
+ con = sectioned[3:]
+ continue
+ if(line[0]==","):
+ continue
+
+print "Twotier Read CON:", con
+
+infile.close()
+
+cmd = "pyphon_generate.py twotier_tab_temp twotier_temp OT"
+print cmd
+os.system(cmd)
+
+infile = open("twotier_temp.csv", 'r')
+
+for line in infile: #scan the intermediate-final form pairs
+ line = line[:-1]
+ if(line[0]=="/"):
+ seg = line.rsplit(",")
+ n = re.search('(?<=/).*(?=/)', seg[0])
+ input = n.group(0)
+ n = re.search('(?<=\[).*(?=\])', seg[1])
+ output = re.sub("(#)|(\.)", "", n.group(0))
+
+
+
+ if len(seg)>len(con)+2:
+ ercs = ",".join(seg[(len(con)+1):])
+ localercs=[]
+ for n in re.finditer('[wel]+', ercs):
+ localercs.append(n.group(0))
+ localerc = "_".join(localercs)
+ else:
+ localerc = 'e'*len(con)
+
+ for pair in ios:
+ if pair[6]==output and [input, output] not in pair[2]:
+ pair[2].append([input, output])
+ found = False
+ for i in range(len(pair[3])):
+ if input == pair[3][i]:
+ initialerc = pair[4][i]
if initialerc == '0':
initialerc = 'e'*len(con)
+ found = True
break
-
+ if not found:
+ continue
+
#Found an I-I-O mapping!
- locally_satisfied = True
- #print "Found output!",pair[0], input, output,initialerc,localerc,"\n"
- print line
+ pair[5] = True
+ print "Found",pair[0], input, output,initialerc,localerc,"\n"

log.write(pair[0]+","+input+","+output+","+initialerc+","+localerc+'\n')
- combinations.append([pair[0], target, output, initialerc, localerc])
- initialerc = re.sub("_", ", ", pair[1])
- continue
-
- if not locally_satisfied:
+ combinations.append([pair[0], pair[6], output, initialerc, localerc])
+
+for pair in ios:
+ if not pair[5]:
failed.append(pair)
-
- infile.close()
+
+infile.close()

log.close()

if failed==[]:
- cmd = "pyphon_find2tlanguages.py "+model+"_twotier.csv"
- print cmd
- os.system(cmd)
+ twotierlangs.find2tlangs(model+"_twotier.csv")
+ print "I-I-O sets written to "+model+"_twotier.csv"
+ print "Language definitions written to "+model+"_twotier_languages.csv"
else:
print "Some IO pairs could not be generated:", failed

Reply all

Reply to author

Forward

0 new messages