[okapi] 4 new revisions pushed by yves.sav...@gmail.com on 2012-03-03 14:51 GMT

0 views
Skip to first unread message

ok...@googlecode.com

unread,
Mar 3, 2012, 9:52:56 AM3/3/12
to okapi-...@googlegroups.com
4 new revisions:

Revision: 4531690fbc0c
Author: Yves <yves.s...@gmail.com>
Date: Thu Mar 1 17:12:45 2012
Log: temporary drp project
http://code.google.com/p/okapi/source/detail?r=4531690fbc0c

Revision: 3082a225a2cf
Author: Yves <yves.s...@gmail.com>
Date: Fri Mar 2 14:13:53 2012
Log: Merge branch 'master' of https://code.google.com/p/okapi
http://code.google.com/p/okapi/source/detail?r=3082a225a2cf

Revision: 8b7c7455e9bc
Author: Yves <yves.s...@gmail.com>
Date: Sat Mar 3 05:06:04 2012
Log: Merge branch 'master' of https://code.google.com/p/okapi
http://code.google.com/p/okapi/source/detail?r=8b7c7455e9bc

Revision: 03ad4ea45858
Author: Yves <yves.s...@gmail.com>
Date: Sat Mar 3 06:51:27 2012
Log: Added no-query threshold option in Leveraging step and IQuery.
http://code.google.com/p/okapi/source/detail?r=03ad4ea45858

==============================================================================
Revision: 4531690fbc0c
Author: Yves <yves.s...@gmail.com>
Date: Thu Mar 1 17:12:45 2012
Log: temporary drp project

http://code.google.com/p/okapi/source/detail?r=4531690fbc0c

Modified:
/okapi/filters/drupal/src/test/resources/test.drp

=======================================
--- /okapi/filters/drupal/src/test/resources/test.drp Wed Feb 29 15:02:22
2012
+++ /okapi/filters/drupal/src/test/resources/test.drp Thu Mar 1 17:12:45
2012
@@ -3,7 +3,10 @@
password=#BeNcStreXNhdm91cmVs
sourceLocale=en
targetLocale=en
-9 yes
+neutralLikeSource=yes
+12 no
+11 yes
+10 no
+9 no
8 no
3 no
-1 yes

==============================================================================
Revision: 3082a225a2cf
Author: Yves <yves.s...@gmail.com>
Date: Fri Mar 2 14:13:53 2012
Log: Merge branch 'master' of https://code.google.com/p/okapi

http://code.google.com/p/okapi/source/detail?r=3082a225a2cf

==============================================================================
Revision: 8b7c7455e9bc
Author: Yves <yves.s...@gmail.com>
Date: Sat Mar 3 05:06:04 2012
Log: Merge branch 'master' of https://code.google.com/p/okapi

http://code.google.com/p/okapi/source/detail?r=8b7c7455e9bc

==============================================================================
Revision: 03ad4ea45858
Author: Yves <yves.s...@gmail.com>
Date: Sat Mar 3 06:51:27 2012
Log: Added no-query threshold option in Leveraging step and IQuery.

http://code.google.com/p/okapi/source/detail?r=03ad4ea45858

Modified:
/deployment/shared/changes.html

/okapi-ui/swt/steps/leveraging-ui/src/main/java/net/sf/okapi/steps/leveraging/ui/ParametersEditor.java
/okapi/core/src/main/java/net/sf/okapi/common/query/IQuery.java

/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/BaseConnector.java

/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/QueryManager.java

/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/LeveragingStep.java

/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/Parameters.java

=======================================
--- /deployment/shared/changes.html Thu Mar 1 14:47:54 2012
+++ /deployment/shared/changes.html Sat Mar 3 06:51:27 2012
@@ -80,6 +80,11 @@
<li>Added 3 options for treating already segmented text.</li>
</ul>
</li>
+ <li>Leveraging Step:<ul>
+ <li>Added the option to not query if the entry has already a candidate
+ equals or above a given score.</li>
+</ul>
+ </li>
</ul>
</li>
<li>Library:<ul>
=======================================
---
/okapi-ui/swt/steps/leveraging-ui/src/main/java/net/sf/okapi/steps/leveraging/ui/ParametersEditor.java
Wed May 25 10:26:26 2011
+++
/okapi-ui/swt/steps/leveraging-ui/src/main/java/net/sf/okapi/steps/leveraging/ui/ParametersEditor.java
Sat Mar 3 06:51:27 2012
@@ -1,5 +1,5 @@

/*===========================================================================
- Copyright (C) 2009-2010 by the Okapi Framework contributors
+ Copyright (C) 2009-2012 by the Okapi Framework contributors

-----------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -62,6 +62,8 @@
private IHelp help;
private Composite mainComposite;
private IContext context;
+ private Label stNoQueryThreshold;
+ private Spinner spnNoQueryThreshold;
private Label stThreshold;
private Spinner spnThreshold;
private Button chkDowngradeIBM;
@@ -198,6 +200,15 @@
gdTmp.horizontalSpan = 2;
pnlConnector.setLayoutData(gdTmp);

+ stNoQueryThreshold = new Label(mainComposite, SWT.NONE);
+ stNoQueryThreshold.setText("Do not query if there is already a candidate
with a score equals to or above:");
+
+ spnNoQueryThreshold = new Spinner(mainComposite, SWT.BORDER);
+ spnNoQueryThreshold.setMinimum(0);
+ spnNoQueryThreshold.setMaximum(101); // Allow 101 to allow to always
query
+ spnNoQueryThreshold.setIncrement(1);
+ spnNoQueryThreshold.setPageIncrement(10);
+
stThreshold = new Label(mainComposite, SWT.NONE);
stThreshold.setText("Accept matches that are equal or above this
score:");

@@ -342,6 +353,8 @@
private void updateOptionsDisplay () {
boolean enabled = chkLeverage.getSelection();
pnlConnector.setEnabled(enabled);
+ stNoQueryThreshold.setEnabled(enabled);
+ spnNoQueryThreshold.setEnabled(enabled);
stThreshold.setEnabled(enabled);
spnThreshold.setEnabled(enabled);
chkDowngradeIBM.setEnabled(enabled);
@@ -392,6 +405,7 @@
private void setData () {
chkLeverage.setSelection(params.getLeverage());
pnlConnector.setData(params.getResourceClassName(),
params.getResourceParameters());
+ spnNoQueryThreshold.setSelection(params.getNoQueryThreshold());
spnThreshold.setSelection(params.getThreshold());
chkDowngradeIBM.setSelection(params.getDowngradeIdenticalBestMatches());
chkFillTarget.setSelection(params.getFillTarget());
@@ -433,6 +447,7 @@

params.setResourceClassName(pnlConnector.getConnectorClass());
params.setResourceParameters(pnlConnector.getConnectorParameters());
+ params.setNoQueryThreshold(spnNoQueryThreshold.getSelection());
params.setThreshold(spnThreshold.getSelection());
params.setDowngradeIdenticalBestMatches(chkDowngradeIBM.getSelection());
params.setFillTarget(chkFillTarget.getSelection());
=======================================
--- /okapi/core/src/main/java/net/sf/okapi/common/query/IQuery.java Wed Aug
31 04:35:04 2011
+++ /okapi/core/src/main/java/net/sf/okapi/common/query/IQuery.java Sat
Mar 3 06:51:27 2012
@@ -180,4 +180,19 @@
* @return the weight for this connector
*/
public int getWeight();
-}
+
+ /**
+ * Sets the threshold for when to avoid executing a query.
+ * <p>If the entry to leverage has already one candidate with a score
equals or above this value,
+ * no query is performed. Use 101 to always allow the query.
+ * @param noQueryThreshold no-query threshold (a value between 0 and 101).
+ */
+ public void setNoQueryThreshold (int noQueryThreshold);
+
+ /**
+ * Gets the threshold for when to avoid a query.
+ * @return the current no-query threshold.
+ */
+ public int getNoQueryThreshold ();
+
+}
=======================================
---
/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/BaseConnector.java
Fri Sep 9 14:06:22 2011
+++
/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/BaseConnector.java
Sat Mar 3 06:51:27 2012
@@ -1,5 +1,5 @@

/*===========================================================================
- Copyright (C) 2010-2011 by the Okapi Framework contributors
+ Copyright (C) 2010-2012 by the Okapi Framework contributors

-----------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -28,6 +28,7 @@
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.IResource;
import net.sf.okapi.common.LocaleId;
+import net.sf.okapi.common.annotation.AltTranslation;
import net.sf.okapi.common.annotation.AltTranslationsAnnotation;
import net.sf.okapi.common.query.IQuery;
import net.sf.okapi.common.query.QueryResult;
@@ -52,6 +53,7 @@
protected QueryResult result;
protected int current = -1;
private int weight;
+ private int noQueryThreshold = 101;

@Override
public LocaleId getSourceLanguage () {
@@ -169,6 +171,17 @@
// Skip segments with no text
if ( !srcSeg.text.hasText(false) ) continue;

+ // Check for existing candidates
+ // So we optionally do not query resources if it's not needed
+ Segment ts = null;
+ if ( trgCont.hasBeenSegmented() ) {
+ ts = trgSegs.get(srcSeg.getId());
+ if ( hasAlreadyCandidate(ts, null) ) continue;
+ }
+ else {
+ if ( hasAlreadyCandidate(null, trgCont) ) continue;
+ }
+
// Do the query for the source segment
query(srcSeg.text);
// Then process each result
@@ -180,8 +193,8 @@
TextUnitUtil.copySrcCodeDataToMatchingTrgCodes(srcSeg.text, qr.target,
true, false, null, tu);

if ( trgCont.hasBeenSegmented() ) {
- // Get corresponding target segment
- Segment ts = trgSegs.get(srcSeg.getId());
+ // Get corresponding target segment is done already
+ // Create it if needed
if ( ts == null ) {
ts = new Segment(srcSeg.id, new TextFragment(""));
trgSegs.append(ts);
@@ -202,6 +215,25 @@
}
}
}
+
+ /**
+ * Checks if the segment or container has already a translation candidate
+ * with a score equal or above a given value.
+ * @param seg the segment entry (or null to use the container, or if
there is no segment)
+ * @param tc the container entry (or null to use the segment)
+ * @return true if the entry has at least one candidate with a score
equal or above the given threshold.
+ */
+ private boolean hasAlreadyCandidate (Segment seg,
+ TextContainer tc)
+ {
+ AltTranslationsAnnotation ann = null;
+ if ( seg != null ) ann =
seg.getAnnotation(AltTranslationsAnnotation.class);
+ else if ( tc != null ) ann =
tc.getAnnotation(AltTranslationsAnnotation.class);
+ if ( ann == null ) return false;
+ AltTranslation alt = ann.getFirst();
+ if ( alt == null ) return false;
+ return (alt.getCombinedScore() >= noQueryThreshold);
+ }

/**
* Slow default implementation using leverage(TextUnit).
@@ -214,7 +246,17 @@
leverage(tu);
}
}
-
+
+ @Override
+ public void setNoQueryThreshold (int noQueryThreshold) {
+ this.noQueryThreshold = noQueryThreshold;
+ }
+
+ @Override
+ public int getNoQueryThreshold () {
+ return noQueryThreshold;
+ }
+
/**
* Call this method inside the overriding {@link #leverage(ITextUnit)}
method
* of the derived class, if that class offers a fast {@link
#batchQuery(List)} method.
@@ -237,13 +279,33 @@
protected void batchLeverageUsingBatchQuery (List<ITextUnit> tuList) {
// Gather all fragments in a list
ArrayList<TextFragment> frags = new ArrayList<TextFragment>();
+ ArrayList<String> fragsIds = new ArrayList<String>();
+
for ( ITextUnit tu : tuList ) {
// Skip non-translatable
if ( !tu.isTranslatable() ) continue;
+
+ // Check if we need to query
+ ISegments trgSegs = null;
+ TextContainer trgCont = tu.getTarget(getTargetLanguage()); // Null if
it does not exists
+ if ( trgCont != null ) trgSegs = trgCont.getSegments();
+
// We assume here that if there is a target content it match the
segmentation of the source
// Create an empty target (or return existing target)
for ( Segment srcSeg : tu.getSource().getSegments() ) {
+
+ // Check for existing candidates
+ // So we optionally do not query resources if it's not needed
+ if (( trgSegs != null ) && trgCont.hasBeenSegmented() ) {
+ Segment ts = trgSegs.get(srcSeg.getId());
+ if ( hasAlreadyCandidate(ts, null) ) continue;
+ }
+ else {
+ if ( hasAlreadyCandidate(null, trgCont) ) continue;
+ }
+
frags.add(srcSeg.text);
+ fragsIds.add(tu.getId()+"_"+srcSeg.getId());
}
}

@@ -260,6 +322,11 @@
TextContainer trgCont = tu.createTarget(getTargetLanguage(), false,
IResource.COPY_SEGMENTATION);
ISegments trgSegs = trgCont.getSegments();
for ( Segment srcSeg : tu.getSource().getSegments() ) {
+
+ // Check if this entry was queried
+ if ( !fragsIds.contains(tu.getId()+"_"+srcSeg.getId()) ) {
+ continue;
+ }

// Get the list of translation for that segment
List<QueryResult> resList = allResults.get(++transIndex);
=======================================
---
/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/QueryManager.java
Mon Oct 3 10:18:56 2011
+++
/okapi/libraries/lib-translation/src/main/java/net/sf/okapi/lib/translation/QueryManager.java
Sat Mar 3 06:51:27 2012
@@ -55,6 +55,7 @@
private int exactBestMatches;
private int fuzzyBestMatches;
private String rootDir;
+ private int noQueryThreshold = 101;
// Options
private int thresholdToFill = Integer.MAX_VALUE;
private boolean keepIfNotEmpty = true; // == false for actual option
@@ -108,6 +109,7 @@
// Add the resource
int id = addResource(connector, resourceName);
// Set the parameters and open
+ connector.setNoQueryThreshold(noQueryThreshold);
connector.setRootDirectory(rootDir); // Before open()
connector.setParameters(params);
connector.open();
@@ -476,6 +478,19 @@
}
}

+ /**
+ * Sets the no-query threshold for this query manager.
+ * and all the translation resources it holds.
+ * @param noQueryThreshold the value of the no-query threshold (between 0
and 101).
+ * Use 101 to always allow the query.
+ */
+ public void setNoQueryThreshold (int noQueryThreshold) {
+ this.noQueryThreshold = noQueryThreshold;
+ for ( ResourceItem ri : resList.values() ) {
+ ri.query.setNoQueryThreshold(this.noQueryThreshold);
+ }
+ }
+
/**
* Sets the options for performing the leverage.
* @param thresholdToFill if the first match has a score equal or above
this value,
@@ -514,18 +529,19 @@
* Leverages a text unit (segmented or not) based on the current settings.
* Any options or attributes needed must be set before calling this
method.
* @param tu the text unit to leverage.
+ * @see #setAttribute(String, String)
+ * @see #setLanguages(LocaleId, LocaleId)
+ * @see #setMaximumHits(int)
+ * @see #setNoQueryThreshold(int)
* @see #setOptions(int, boolean, boolean, boolean, String, int, boolean)
+ * @see #setRootDirectory(String)
+ * @see #setThreshold(int)
*/
- public void leverage (ITextUnit tu)
-// int thresholdToFill,
-// boolean downgradeIdenticalBestMatches,
-// String targetPrefix,
-// int thresholdToPrefix)
- {
+ public void leverage (ITextUnit tu) {
if ( !tu.isTranslatable() ) {
return;
- }
-
+ }
+
totalSegments += tu.getSource().getSegments().count();

// Query each translation resource
=======================================
---
/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/LeveragingStep.java
Mon Aug 22 07:25:55 2011
+++
/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/LeveragingStep.java
Sat Mar 3 06:51:27 2012
@@ -189,6 +189,7 @@

// Else: initialize the global variables
qm = new QueryManager();
+ qm.setNoQueryThreshold(params.getNoQueryThreshold());
qm.setThreshold(params.getThreshold());
qm.setRootDirectory(rootDir);

=======================================
---
/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/Parameters.java
Wed May 18 13:59:18 2011
+++
/okapi/steps/leveraging/src/main/java/net/sf/okapi/steps/leveraging/Parameters.java
Sat Mar 3 06:51:27 2012
@@ -1,5 +1,5 @@

/*===========================================================================
- Copyright (C) 2009-2010 by the Okapi Framework contributors
+ Copyright (C) 2009-2012 by the Okapi Framework contributors

-----------------------------------------------------------------------------
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
@@ -25,6 +25,8 @@
public class Parameters extends BaseParameters {

private static final String LEVERAGE = "leverage";
+ private static final String NOQUERYTHRESHOLD = "noQueryThreshold";
+ private static final String THRESHOLD = "threshold";
private static final String FILLTARGET = "fillTarget";
private static final String FILLTARGETTHRESHOLD = "fillTargetThreshold";
private static final String FILLIFTARGETISEMPTY = "fillIfTargetIsEmpty";
@@ -40,6 +42,7 @@

private String resourceClassName;
private String resourceParameters;
+ private int noQueryThreshold;
private int threshold;
private boolean fillTarget;
private int fillTargetThreshold;
@@ -98,6 +101,14 @@
public void setResourceParameters (String resourceParameters) {
this.resourceParameters = resourceParameters;
}
+
+ public int getNoQueryThreshold () {
+ return noQueryThreshold;
+ }
+
+ public void setNoQueryThreshold (int noQuerythreshold) {
+ this.noQueryThreshold = noQuerythreshold;
+ }

public int getThreshold () {
return threshold;
@@ -192,6 +203,7 @@
leverage = true;
resourceClassName
= "net.sf.okapi.connectors.pensieve.PensieveTMConnector";
resourceParameters = null;
+ noQueryThreshold = 101;
threshold = 95;
fillTarget = true;
fillTargetThreshold = 95;
@@ -213,7 +225,8 @@
buffer.fromString(data);
resourceClassName = buffer.getString("resourceClassName",
resourceClassName);
resourceParameters = buffer.getGroup("resourceParameters",
resourceParameters);
- threshold = buffer.getInteger("threshold", threshold);
+ noQueryThreshold = buffer.getInteger(NOQUERYTHRESHOLD, noQueryThreshold);
+ threshold = buffer.getInteger(THRESHOLD, threshold);
fillTarget = buffer.getBoolean(FILLTARGET, fillTarget);
fillTargetThreshold = buffer.getInteger(FILLTARGETTHRESHOLD,
fillTargetThreshold);
fillIfTargetIsEmpty = buffer.getBoolean(FILLIFTARGETISEMPTY,
fillIfTargetIsEmpty);
@@ -234,7 +247,8 @@
buffer.reset();
buffer.setString("resourceClassName", resourceClassName);
buffer.setGroup("resourceParameters", resourceParameters);
- buffer.setInteger("threshold", threshold);
+ buffer.setInteger(NOQUERYTHRESHOLD, noQueryThreshold);
+ buffer.setInteger(THRESHOLD, threshold);
buffer.setBoolean(FILLTARGET, fillTarget);
buffer.setInteger(FILLTARGETTHRESHOLD, fillTargetThreshold);
buffer.setBoolean(FILLIFTARGETISEMPTY, fillIfTargetIsEmpty);

Reply all
Reply to author
Forward
0 new messages