[google-caja] r3931 committed - Issue 1180: Fix URI resolution and extract javascript URLs....

1 view
Skip to first unread message

googl...@googlecode.com

unread,
Dec 24, 2009, 2:21:14 AM12/24/09
to caja....@gmail.com
Revision: 3931
Author: mikes...@gmail.com
Date: Wed Dec 23 23:20:46 2009
Log: Issue 1180: Fix URI resolution and extract javascript URLs.
http://codereview.appspot.com/180091

Prior, we resolved URIs in HTML inconsistently.
We completely ignored <base href> URIs, and instead got the URI
from the FilePosition. This is problematic for content parsed
without debugging on. This CL does not entirely address that problem
but adds a pass so that when the base URI is known, we correctly
apply it.

This change also consolidates code that finds and parses embedded
content: styles and scripts in HTML.

A following CL will have to thread the base URI reliably through from
our external APIs. That will require coordination with clients, so is
out of scope for this change.

R=jas...@gmail.com

http://code.google.com/p/google-caja/source/detail?r=3931

Added:
/trunk/src/com/google/caja/ancillary/servlet/GuessContentType.java
/trunk/src/com/google/caja/lexer/UriDecoder.java
/trunk/src/com/google/caja/plugin/stages/EmbeddedContent.java
/trunk/src/com/google/caja/plugin/stages/HtmlEmbeddedContentFinder.java
/trunk/src/com/google/caja/plugin/stages/ResolveUriStage.java
/trunk/src/com/google/caja/util/ContentType.java
/trunk/tests/com/google/caja/plugin/stages/ResolveUriStageTest.java
Deleted:
/trunk/src/com/google/caja/ancillary/servlet/ContentType.java
Modified:
/trunk/src/com/google/caja/ancillary/servlet/CajaWebToolsServlet.java
/trunk/src/com/google/caja/ancillary/servlet/Content.java
/trunk/src/com/google/caja/ancillary/servlet/IndexPage.java
/trunk/src/com/google/caja/ancillary/servlet/Input.java
/trunk/src/com/google/caja/ancillary/servlet/Job.java
/trunk/src/com/google/caja/ancillary/servlet/Processor.java
/trunk/src/com/google/caja/ancillary/servlet/Request.java
/trunk/src/com/google/caja/ancillary/servlet/StaticFiles.java
/trunk/src/com/google/caja/ancillary/servlet/UriFetcher.java
/trunk/src/com/google/caja/demos/playground/client/PlaygroundService.java
/trunk/src/com/google/caja/lexer/CharProducer.java
/trunk/src/com/google/caja/lexer/escaping/UriUtil.java
/trunk/src/com/google/caja/parser/css/CssParser.java
/trunk/src/com/google/caja/parser/html/AttribKey.java
/trunk/src/com/google/caja/parser/html/ElKey.java
/trunk/src/com/google/caja/parser/js/Block.java
/trunk/src/com/google/caja/plugin/PluginCompiler.java
/trunk/src/com/google/caja/plugin/stages/RewriteHtmlStage.java
/trunk/src/com/google/caja/plugin/templates/HtmlAttributeRewriter.java
/trunk/src/com/google/caja/plugin/templates/SafeHtmlMaker.java
/trunk/src/com/google/caja/plugin/templates/TemplateCompiler.java
/trunk/tests/com/google/caja/ancillary/servlet/ZipFileSystemTest.java
/trunk/tests/com/google/caja/lexer/CharProducerTest.java
/trunk/tests/com/google/caja/opensocial/example-rewritten.xml
/trunk/tests/com/google/caja/plugin/domita_test_untrusted.html
/trunk/tests/com/google/caja/plugin/stages/RewriteHtmlStageTest.java
/trunk/tests/com/google/caja/plugin/templates/TemplateCompilerTest.java

=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/ancillary/servlet/GuessContentType.java Wed
Dec 23 23:20:46 2009
@@ -0,0 +1,85 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.ancillary.servlet;
+
+import com.google.caja.lexer.CharProducer;
+import com.google.caja.lexer.CssLexer;
+import com.google.caja.lexer.CssTokenType;
+import com.google.caja.lexer.FilePosition;
+import com.google.caja.lexer.ParseException;
+import com.google.caja.lexer.Token;
+import com.google.caja.reporting.DevNullMessageQueue;
+import com.google.caja.util.ContentType;
+
+/**
+ * Guesses content type based on reported mime-type, file name, content of
file.
+ *
+ * @author mikes...@gmail.com
+ */
+class GuessContentType {
+
+ static ContentType guess(String mimeType, String path, CharSequence
code) {
+ ContentType contentType = null;
+ if (mimeType != null) { contentType =
ContentType.fromMimeType(mimeType); }
+ if (contentType == null && path != null) {
+ int dot = path.lastIndexOf('.');
+ if (dot >= 0) {
+ String ext = path.substring(dot + 1);
+ for (ContentType candidate : ContentType.values()) {
+ if (ext.equals(candidate.ext)) {
+ contentType = candidate;
+ break;
+ }
+ }
+ }
+ }
+ if (contentType == null && code != null) {
+ char ch = '\0';
+ for (int i = 0, n = code.length(); i < n; ++i) {
+ ch = code.charAt(i);
+ if (!Character.isWhitespace(ch)) { break; }
+ }
+ switch (ch) {
+ case '<':
+ contentType = ContentType.HTML;
+ break;
+ case '@': case '.': case '#':
+ contentType = ContentType.CSS;
+ break;
+ }
+ }
+ if (contentType == null && code != null) {
+ // Try and lex and see what happens.
+ CharProducer cp = CharProducer.Factory.fromString(
+ code, FilePosition.UNKNOWN);
+ try {
+ CssLexer cssLexer = new CssLexer(
+ cp, DevNullMessageQueue.singleton(), false);
+ contentType = ContentType.CSS;
+ while (cssLexer.hasNext()) {
+ Token<CssTokenType> t = cssLexer.next();
+ if ("if".equals(t.text) || "while".equals(t.text)
+ || "for".equals(t.text) || "return".equals(t.text)) {
+ contentType = ContentType.JS;
+ break;
+ }
+ }
+ } catch (ParseException ex) {
+ contentType = ContentType.JS;
+ }
+ }
+ return contentType;
+ }
+}
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/lexer/UriDecoder.java Wed Dec 23 23:20:46
2009
@@ -0,0 +1,136 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.lexer;
+
+/**
+ * Decodes url-encoded content as specified in section 2.4 of
+ * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
+ *
+ * @author mikes...@gmail.com
+ */
+final class UriDecoder extends DecodingCharProducer.Decoder {
+ @Override
+ public void decode(char[] chars, int offset, int limit) {
+ decodeOneChar(chars, offset, limit);
+
+ int cp = codePoint;
+ int e = end;
+ // Decode surrogate pairs.
+ if (Character.isHighSurrogate((char) codePoint) && e < limit) {
+ decodeOneChar(chars, e, limit);
+ if (Character.isLowSurrogate((char) codePoint)) {
+ codePoint = Character.toCodePoint((char) cp, (char) codePoint);
+ } else {
+ codePoint = cp;
+ end = e;
+ }
+ }
+ }
+
+ private void decodeOneChar(char[] chars, final int offset, int limit) {
+ // URI encoding is odd in that it encodes a byte sequence in a char
string,
+ // so we cannot use the normal hex decoding which produces a code-unit.
+ // This is correct for UTF-8 encoded byte sequences which is what the
+ // JS encodeURIComponent and decodeURIComponent builtins do.
+ // For other encoding schemes, this may not work as the URI author
intended
+ // but there is no standard to determine the encoding of a URL unless
that
+ // URL is part of an HTTP envelope.
+ int end = offset + 1;
+ int codepoint = chars[offset];
+ if (codepoint == '%') {
+ int b = toByte(chars, offset, limit);
+ if (b >= 0) {
+ if ((b & 0x80) == 0) {
+ codepoint = b;
+ end += 2;
+ } else {
+ int result;
+ // If it is a valid multi-byte UTF_8 sequence, treat ot as such.
+ if ((b & 0xe0) == 0xc0) {
+ result = fromUtf8(b & 0x1f, 1, chars, offset, limit);
+ if (result >= 0) { end += 5; }
+ } else if ((b & 0xf0) == 0xe0) {
+ result = fromUtf8(b & 0xf, 2, chars, offset, limit);
+ if (result >= 0) { end += 8; }
+ } else if ((b & 0xf8) == 0xf0) {
+ result = fromUtf8(b & 0x7, 3, chars, offset, limit);
+ if (result >= 0) { end += 11; }
+ } else {
+ result = -1;
+ }
+ if (result >= 0) { // A well formed UTF-8 code unit.
+ codepoint = result;
+ } else { // Treat as a single ASCII character.
+ codepoint = b;
+ end += 2;
+ }
+ }
+ }
+ }
+ this.codePoint = codepoint;
+ this.end = end;
+ }
+
+ /**
+ * @param offset the position of a '%' in chars.
+ * @return -1 if there are not enough characters to form a 2 byte hex
+ * sequence or if either hex digit is invalid.
+ * Otherwise, a number in [0, 255].
+ */
+ private static int toByte(char[] chars, int offset, int limit) {
+ if (offset + 3 > limit) { return -1; }
+ int a = fromHex(chars[offset + 1]);
+ if (a < 0) { return -1; }
+ int b = fromHex(chars[offset + 2]);
+ if (b < 0) { return -1; }
+ return ((a << 4) | b);
+ }
+
+ /**
+ * The numberic valie of a hex digit or -1 if ch is not a hex digit.
+ */
+ private static int fromHex(char ch) {
+ if (ch > 'f' || ch < '0') { return -1; }
+ if (ch <= '9') { return ch - '0'; }
+ if (ch >= 'a') { return ch - ('a' - 10); }
+ if (ch <= 'F' && ch >= 'A') { return ch - ('A' - 10); }
+ return -1;
+ }
+
+ /**
+ * The code unit value of a URI encoded UTF-8 sequence like "%C4%A3".
+ * @param prefix the significant bits in the first byte of the UTF-8
+ * sequence.
+ * @param nChunks the number of extra bytes in the sequence.
+ * @param offset the position of the % that starts the UTF-8 sequence;
+ * the first % in the example above.
+ */
+ private static int fromUtf8(
+ int prefix, int nChunks, char[] chars, int offset, int limit) {
+ int cur = offset + 2;
+ int end = cur + 1 + 3 * nChunks;
+ if (end > limit) { return -1; }
+ int bits = prefix;
+ for (int i = 0; i < nChunks; ++i) {
+ if (chars[++cur] != '%') { return -1; }
+ int a = fromHex(chars[++cur]);
+ if (a < 0 || ((a & 0xc) != 0x8)) { return -1; }
+ int b = fromHex(chars[++cur]);
+ if (b < 0) { return -1; }
+ bits = (bits << 6) | (((a & 0x3) << 4) | b);
+ }
+ return bits;
+ }
+}
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/plugin/stages/EmbeddedContent.java Wed Dec
23 23:20:46 2009
@@ -0,0 +1,120 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.plugin.stages;
+
+import com.google.caja.SomethingWidgyHappenedError;
+import com.google.caja.lexer.CharProducer;
+import com.google.caja.lexer.ExternalReference;
+import com.google.caja.lexer.FilePosition;
+import com.google.caja.lexer.ParseException;
+import com.google.caja.parser.ParseTreeNode;
+import com.google.caja.parser.css.CssParser;
+import com.google.caja.parser.css.CssTree;
+import com.google.caja.parser.js.Block;
+import com.google.caja.parser.js.Parser;
+import com.google.caja.plugin.PluginEnvironment;
+import com.google.caja.reporting.MessageQueue;
+import com.google.caja.util.ContentType;
+import com.google.caja.util.Function;
+
+import java.util.Collections;
+
+import org.w3c.dom.Node;
+
+/**
+ * Content in another language extracted from an HTML document.
+ *
+ * @author mikes...@gmail.com
+ */
+public final class EmbeddedContent {
+ private final HtmlEmbeddedContentFinder finder;
+ private final FilePosition pos;
+ private final Function<PluginEnvironment, CharProducer> getter;
+ private final ExternalReference contentLocation;
+ private final boolean deferred;
+ private final Node source;
+ private final ContentType type;
+
+ EmbeddedContent(
+ HtmlEmbeddedContentFinder finder, FilePosition pos,
+ Function<PluginEnvironment, CharProducer> getter,
+ ExternalReference contentLocation, boolean deferred, Node source,
+ ContentType type) {
+ this.finder = finder;
+ this.pos = pos;
+ this.getter = getter;
+ this.deferred = deferred;
+ this.contentLocation = contentLocation;
+ this.source = source;
+ this.type = type;
+ }
+
+ public FilePosition getPosition() { return pos; }
+ /**
+ * The message queue associated with the HtmlEmbeddedContentFinder that
+ * creates this instance will receive a message if fetching external
content
+ * failed.
+ * In this case, content will be returned that is semantically
equivalent,
+ * such as code to raise a JS exception to trigger <tt>onerror</tt>
+ * handlers.
+ */
+ public CharProducer getContent(PluginEnvironment env) {
+ return getter.apply(env);
+ }
+ /** Non null for remote content. */
+ public ExternalReference getContentLocation() { return contentLocation; }
+ public boolean isDeferred() { return deferred; }
+ public Node getSource() { return source; }
+ /**
+ * Returns a parse tree node containing the content. For content from
+ * elements this does not include any information from modifying
attributes
+ * such as the <tt>media</tt> attribute on {@code <link>} and
+ * {@code <style>} elements.
+ * @param mq receives messages about parsing problems but not about
+ * content fetching.
+ */
+ public ParseTreeNode parse(PluginEnvironment env, MessageQueue mq)
+ throws ParseException {
+ if (type == null) { return null; } // Malformed content
+ CharProducer cp = getContent(env);
+ FilePosition p = cp.filePositionForOffsets(cp.getOffset(),
cp.getLimit());
+ switch (type) {
+ case JS: {
+ Parser parser = finder.makeJsParser(cp, mq);
+ if (parser.getTokenQueue().isEmpty()) { return new Block(p); }
+ return parser.parse();
+ }
+ case CSS: {
+ CssParser parser = finder.makeCssParser(cp, mq);
+ if (source.getNodeType() == Node.ELEMENT_NODE) {
+ if (parser.getTokenQueue().isEmpty()) {
+ return new CssTree.StyleSheet(
+ p, Collections.<CssTree.CssStatement>emptyList());
+ }
+ return parser.parseStyleSheet();
+ } else {
+ if (parser.getTokenQueue().isEmpty()) {
+ return new CssTree.DeclarationGroup(
+ p, Collections.<CssTree.Declaration>emptyList());
+ }
+ return parser.parseDeclarationGroup();
+ }
+ }
+ default: throw new SomethingWidgyHappenedError(type.toString());
+ }
+ }
+ /** Null for bad content. */
+ public ContentType getType() { return type; }
+}
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/plugin/stages/HtmlEmbeddedContentFinder.java
Wed Dec 23 23:20:46 2009
@@ -0,0 +1,418 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.plugin.stages;
+
+import com.google.caja.SomethingWidgyHappenedError;
+import com.google.caja.lang.html.HTML;
+import com.google.caja.lang.html.HtmlSchema;
+import com.google.caja.lexer.CharProducer;
+import com.google.caja.lexer.CssTokenType;
+import com.google.caja.lexer.ExternalReference;
+import com.google.caja.lexer.FilePosition;
+import com.google.caja.lexer.InputSource;
+import com.google.caja.lexer.JsLexer;
+import com.google.caja.lexer.JsTokenQueue;
+import com.google.caja.lexer.TokenQueue;
+import com.google.caja.lexer.escaping.UriUtil;
+import com.google.caja.parser.css.CssParser;
+import com.google.caja.parser.html.AttribKey;
+import com.google.caja.parser.html.ElKey;
+import com.google.caja.parser.html.Nodes;
+import com.google.caja.parser.js.Parser;
+import com.google.caja.parser.js.StringLiteral;
+import com.google.caja.plugin.PluginEnvironment;
+import com.google.caja.plugin.PluginMessageType;
+import com.google.caja.reporting.Message;
+import com.google.caja.reporting.MessageContext;
+import com.google.caja.reporting.MessageLevel;
+import com.google.caja.reporting.MessagePart;
+import com.google.caja.reporting.MessageQueue;
+import com.google.caja.util.ContentType;
+import com.google.caja.util.Function;
+import com.google.caja.util.Lists;
+import com.google.caja.util.Strings;
+
+import java.io.StringReader;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.List;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.Text;
+
+/**
+ * Finds embedded styles and scripts in a DOM.
+ * For example, finds all attributes, text content, and external content
in:
+ * <pre>
+ * &lt;script src=foo.js&gt;&lt;/script&gt;
+ * &lt;script&gt;foo()&lt;/script&gt;
+ * &lt;link rel=stylesheet href=bar.css&gt;
+ * &lt;style&gt;foo { color: red }&lt;/style&gt;
+ * &lt;a href="javascript:clicked()" onmousedown="baz()"
style="color:red"&gt;
+ * </pre>
+ *
+ * @author mikes...@gmail.com
+ */
+public class HtmlEmbeddedContentFinder {
+ private final HtmlSchema schema;
+ private final URI baseUri;
+ private final MessageQueue mq;
+ private final MessageContext mc;
+
+ public HtmlEmbeddedContentFinder(
+ HtmlSchema schema, URI baseUri, MessageQueue mq, MessageContext mc) {
+ assert schema != null && mq != null
+ && (baseUri == null || (baseUri.isAbsolute()
&& !baseUri.isOpaque()));
+ this.schema = schema;
+ this.baseUri = baseUri;
+ this.mq = mq;
+ this.mc = mc;
+ }
+
+ public Iterable<EmbeddedContent> findEmbeddedContent(Node node) {
+ List<EmbeddedContent> out = Lists.newArrayList();
+ findEmbeddedContent(node, out);
+ return out;
+ }
+
+ private static final ElKey LINK = ElKey.forHtmlElement("link");
+ private static final ElKey SCRIPT = ElKey.forHtmlElement("script");
+ private static final ElKey STYLE = ElKey.forHtmlElement("style");
+ private static final AttribKey LINK_HREF = AttribKey.forHtmlAttrib(
+ LINK, "href");
+ private static final AttribKey LINK_REL = AttribKey.forHtmlAttrib(
+ LINK, "rel");
+ private static final AttribKey SCRIPT_DEFER = AttribKey.forHtmlAttrib(
+ ElKey.HTML_WILDCARD, "defer");
+ private static final AttribKey SCRIPT_SRC = AttribKey.forHtmlAttrib(
+ SCRIPT, "src");
+ private static final AttribKey TYPE = AttribKey.forHtmlAttrib(
+ ElKey.HTML_WILDCARD, "type");
+
+ private void findEmbeddedContent(Node node, List<EmbeddedContent> out) {
+ if (node instanceof Element) {
+ Element el = (Element) node;
+ ElKey key = ElKey.forElement(el);
+ ContentType expected = null;
+ ExternalReference extRef = null;
+ String defaultMimeType = null;
+ boolean deferred = false;
+ if (SCRIPT.equals(key)) {
+ expected = ContentType.JS;
+ extRef = externalReferenceFromAttr(el, SCRIPT_SRC);
+ deferred = Strings.equalsIgnoreCase(
+ "defer",
+ el.getAttributeNS(SCRIPT_DEFER.ns.uri,
SCRIPT_DEFER.localName));
+ } else if (STYLE.equals(key)) {
+ expected = ContentType.CSS;
+ } else if (LINK.equals(key)
+ && Strings.equalsIgnoreCase(
+ "stylesheet",
+ el.getAttributeNS(LINK_REL.ns.uri,
LINK_REL.localName))) {
+ extRef = externalReferenceFromAttr(el, LINK_HREF);
+ if (extRef != null) {
+ expected = ContentType.CSS;
+ defaultMimeType = ContentType.CSS.mimeType;
+ }
+ }
+ if (expected != null) {
+ String mimeType = getMimeTypeFromHtmlTypeAttribute(el, key);
+ if (mimeType == null) { mimeType = defaultMimeType; }
+ ContentType actualType = mimeType != null
+ ? ContentType.fromMimeType(mimeType) : null;
+ if (actualType == expected) {
+ if (extRef == null) {
+ out.add(fromElementBody(el, expected, deferred));
+ } else {
+ out.add(fromExternalReference(el, expected, extRef, deferred));
+ }
+ } else {
+ FilePosition typePos = Nodes.getFilePositionFor(el);
+ Attr a = el.getAttributeNodeNS(TYPE.ns.uri, TYPE.localName);
+ if (a != null) {
+ typePos = Nodes.getFilePositionForValue(a);
+ }
+ mq.addMessage(
+ PluginMessageType.UNRECOGNIZED_CONTENT_TYPE,
+ typePos, MessagePart.Factory.valueOf(mimeType), key);
+ out.add(fromBadContent(el));
+ }
+ }
+ for (Attr a : Nodes.attributesOf(el)) {
+ AttribKey aKey = AttribKey.forAttribute(key, a);
+ HTML.Attribute aInfo = schema.lookupAttribute(aKey);
+ if (aInfo != null) {
+ switch (aInfo.getType()) {
+ case URI:
+ String uriText = UriUtil.normalizeUri(a.getValue());
+ boolean isCode = false;
+ try {
+ URI uri = new URI(uriText);
+ if (Strings.equalsIgnoreCase("javascript",
uri.getScheme())) {
+ isCode = true;
+ }
+ } catch (URISyntaxException ex) {
+ // not code
+ }
+ if (isCode) {
+ out.add(fromAttrib(a, true, ContentType.JS)); break;
+ }
+ break;
+ // This should depend on the Content-Script-Type header:
+ //
http://www.w3.org/TR/REC-html40/interact/scripts.html#h-18.2.2
+ case SCRIPT:
+ out.add(fromAttrib(a, false, ContentType.JS));
+ break;
+ case STYLE:
+ out.add(fromAttrib(a, false, ContentType.CSS));
+ break;
+ default: break;
+ }
+ }
+ }
+ }
+ for (Node c = node.getFirstChild(); c != null; c = c.getNextSibling())
{
+ findEmbeddedContent(c, out);
+ }
+ }
+
+ private EmbeddedContent fromExternalReference(
+ Element el, final ContentType t, final ExternalReference extRef,
+ boolean deferred) {
+ return new EmbeddedContent(
+ this, extRef.getReferencePosition(),
+ new Function<PluginEnvironment, CharProducer>() {
+ boolean loaded;
+ CharProducer cp = null;
+ public CharProducer apply(PluginEnvironment env) {
+ if (!loaded) {
+ URI uri = extRef.getUri();
+ ExternalReference toLoad = extRef;
+ if (!uri.isAbsolute() && baseUri != null) {
+ toLoad = new ExternalReference(
+ baseUri.resolve(uri),
+ extRef.getReferencePosition());
+ }
+ cp = env.loadExternalResource(toLoad, t.mimeType);
+ mc.addInputSource(new InputSource(toLoad.getUri()));
+ loaded = true;
+ }
+ if (cp == null) {
+ URI srcUri = extRef.getUri();
+ String errUri = srcUri.isAbsolute()
+ ? mc.abbreviate(new InputSource(srcUri)) :
srcUri.toString();
+ mq.addMessage(
+ PluginMessageType.FAILED_TO_LOAD_EXTERNAL_URL,
+ extRef.getReferencePosition(),
+ MessagePart.Factory.valueOf(errUri));
+ switch (t) {
+ case JS:
+ // Throw an exception so any user installed error handler
+ // will fire.
+ cp = CharProducer.Factory.fromString(
+ "throw new Error("
+ + StringLiteral.toQuotedValue("Failed to load " +
errUri)
+ + ");",
+ extRef.getReferencePosition());
+ break;
+ case CSS:
+ // Record the fact that the content failed to load.
+ cp = CharProducer.Factory.fromString(
+ "/* Failed to load "
+ + errUri.replaceAll("\\*/", "* /") + " */",
+ extRef.getReferencePosition());
+ break;
+ default:
+ throw new SomethingWidgyHappenedError(t.toString());
+ }
+ }
+ return cp.clone();
+ }
+ },
+ extRef, deferred, el, t);
+ }
+
+ private EmbeddedContent fromElementBody(
+ Element el, ContentType t, boolean deferred) {
+ final CharProducer cp = textNodesToCharProducer(el, t ==
ContentType.JS);
+ return new EmbeddedContent(
+ this, cp.filePositionForOffsets(0, cp.getLimit()),
+ new Function<PluginEnvironment, CharProducer>() {
+ public CharProducer apply(PluginEnvironment env) {
+ return cp.clone();
+ }
+ },
+ null, deferred, el, t);
+ }
+
+ private EmbeddedContent fromAttrib(final Attr a, final boolean uriDecode,
+ ContentType t) {
+ final String rawValue = Nodes.getRawValue(a);
+ final String value = a.getValue();
+ return new EmbeddedContent(
+ this, Nodes.getFilePositionForValue(a),
+ new Function<PluginEnvironment, CharProducer>() {
+ CharProducer cp;
+ public CharProducer apply(PluginEnvironment env) {
+ if (this.cp == null) {
+ CharProducer cp;
+ if (rawValue != null &&
Nodes.decode(rawValue).equals(value)) {
+ int n = rawValue.length();
+ String rawText = rawValue;
+ if (n >= 2) { // Strip quotes
+ char lastCh = rawText.charAt(n - 1);
+ if (lastCh == '"' || lastCh == '\'') {
+ if (rawText.charAt(0) == lastCh) {
+ rawText = " " + rawText.substring(1, n - 1) + " ";
+ } else {
+ rawText = rawText.substring(0, n - 1) + " ";
+ }
+ }
+ }
+ if (uriDecode) { rawText = blankOutScheme(rawText); }
+ cp = CharProducer.Factory.fromHtmlAttribute(
+ CharProducer.Factory.fromString(
+ rawText, Nodes.getFilePositionForValue(a)));
+ } else {
+ String decodedText = value;
+ if (uriDecode) { decodedText =
blankOutScheme(decodedText); }
+ cp = CharProducer.Factory.fromString(
+ decodedText, Nodes.getFilePositionForValue(a));
+ }
+ if (uriDecode) {
+ cp = CharProducer.Factory.fromUri(cp);
+ }
+ this.cp = cp;
+ }
+ return this.cp.clone();
+ }
+ },
+ null, false, a, t);
+ }
+
+ private EmbeddedContent fromBadContent(Element el) {
+ final FilePosition pos =
FilePosition.startOf(Nodes.getFilePositionFor(el));
+ return new EmbeddedContent(
+ this, pos,
+ new Function<PluginEnvironment, CharProducer>() {
+ public CharProducer apply(PluginEnvironment env) {
+ return CharProducer.Factory.fromString("", pos);
+ }
+ },
+ null, false, el, null);
+ }
+
+ private ExternalReference externalReferenceFromAttr(Element el,
AttribKey a) {
+ Attr attr = el.getAttributeNodeNS(a.ns.uri, a.localName);
+ if (attr == null || "".equals(attr.getValue())) { return null; }
+ URI uri;
+ try {
+ uri = new URI(attr.getNodeValue());
+ } catch (URISyntaxException ex) {
+ mq.getMessages().add(
+ new Message(PluginMessageType.MALFORMED_URL, MessageLevel.ERROR,
+ Nodes.getFilePositionFor(attr), a));
+ return null;
+ }
+ return new ExternalReference(uri, Nodes.getFilePositionForValue(attr));
+ }
+
+ /**
+ * A CharProducer that produces characters from the concatenation of all
+ * the text nodes in the given node list.
+ */
+ private static CharProducer textNodesToCharProducer(
+ Element el, boolean stripComments) {
+ List<Text> textNodes = Lists.newArrayList();
+ for (Node node : Nodes.childrenOf(el)) {
+ if (node instanceof Text) { textNodes.add((Text) node); }
+ }
+ if (textNodes.isEmpty()) {
+ return CharProducer.Factory.create(
+ new StringReader(""),
+ FilePosition.endOf(Nodes.getFilePositionFor(el)));
+ }
+ List<CharProducer> content = Lists.newArrayList();
+ for (int i = 0, n = textNodes.size(); i < n; ++i) {
+ Text node = textNodes.get(i);
+ String text = node.getNodeValue();
+ if (stripComments) {
+ if (i == 0) {
+ text = text.replaceFirst("^(\\s*)<!--", "$1 ");
+ }
+ if (i + 1 == n) {
+ text = text.replaceFirst("-->(\\s*)$", " $1");
+ }
+ }
+ content.add(CharProducer.Factory.create(
+ new StringReader(text),
+ FilePosition.startOf(Nodes.getFilePositionFor(node))));
+ }
+ if (content.size() == 1) {
+ return content.get(0);
+ } else {
+ return CharProducer.Factory.chain(content.toArray(new
CharProducer[0]));
+ }
+ }
+
+
+ private String getMimeTypeFromHtmlTypeAttribute(Element el, ElKey elKey)
{
+ Attr type = el.getAttributeNodeNS(TYPE.ns.uri, TYPE.localName);
+ if (type != null) { return type.getValue(); }
+ HTML.Attribute attr = schema.lookupAttribute(TYPE.onElement(elKey));
+ if (attr == null) { return null; }
+ return attr.getDefaultValue();
+ }
+
+ /** May be overridden to affect JS {@link EmbeddedContent#parse
parsing}. */
+ protected boolean shouldAllowJsQuasis() { return false; }
+ /** May be overridden to affect JS {@link EmbeddedContent#parse
parsing}. */
+ protected boolean shouldJsRecover() { return false; }
+ /** May be overridden to affect CSS {@link EmbeddedContent#parse
parsing}. */
+ protected boolean shouldAllowCssSubsts() { return false; }
+ /** May be overridden to affect CSS {@link EmbeddedContent#parse
parsing}. */
+ protected MessageLevel getCssTolerance() { return MessageLevel.WARNING; }
+
+ Parser makeJsParser(CharProducer cp, MessageQueue mq) {
+ boolean quasis = shouldAllowJsQuasis();
+ FilePosition p = cp.filePositionForOffsets(cp.getOffset(),
cp.getLimit());
+ JsLexer lexer = new JsLexer(cp, quasis);
+ JsTokenQueue tq = new JsTokenQueue(lexer, p.source());
+ tq.setInputRange(p);
+ Parser parser = new Parser(tq, mq, quasis);
+ parser.setRecoverFromFailure(shouldJsRecover());
+ return parser;
+ }
+
+ CssParser makeCssParser(CharProducer cp, MessageQueue mq) {
+ boolean allowSubsts = shouldAllowCssSubsts();
+ FilePosition p = cp.filePositionForOffsets(cp.getOffset(),
cp.getLimit());
+ TokenQueue<CssTokenType> tq = CssParser.makeTokenQueue(cp, mq,
allowSubsts);
+ tq.setInputRange(p);
+ return new CssParser(tq, mq, getCssTolerance());
+ }
+
+ private static String blankOutScheme(String s) {
+ int colon = s.indexOf(':');
+ StringBuilder sb = new StringBuilder(s);
+ for (int i = colon + 1; --i >= 0;) {
+ char ch = sb.charAt(i);
+ if (!Character.isWhitespace(ch)) { sb.setCharAt(i, ' '); }
+ }
+ return sb.toString();
+ }
+}
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/plugin/stages/ResolveUriStage.java Wed Dec
23 23:20:46 2009
@@ -0,0 +1,140 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.plugin.stages;
+
+import com.google.caja.lang.html.HTML;
+import com.google.caja.lang.html.HtmlSchema;
+import com.google.caja.lexer.FilePosition;
+import com.google.caja.lexer.InputSource;
+import com.google.caja.lexer.escaping.UriUtil;
+import com.google.caja.parser.html.AttribKey;
+import com.google.caja.parser.html.ElKey;
+import com.google.caja.parser.html.Nodes;
+import com.google.caja.plugin.Dom;
+import com.google.caja.plugin.Job;
+import com.google.caja.plugin.Jobs;
+import com.google.caja.util.Pipeline;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+/**
+ * For each HTML job, tries to determine a base URI. If it can, it will
resolve
+ * well-formed relative URIs in attributes.
+ *
+ * @author mikes...@gmail.com
+ */
+public class ResolveUriStage implements Pipeline.Stage<Jobs> {
+ private static final ElKey BASE = ElKey.forHtmlElement("base");
+ private static final AttribKey BASE_HREF
+ = AttribKey.forHtmlAttrib(BASE, "href");
+
+ private final HtmlSchema schema;
+
+ public ResolveUriStage(HtmlSchema schema) {
+ this.schema = schema;
+ }
+
+ private URI baseUri(Node root, FilePosition pos) {
+ URI uri = baseUriForDoc(root);
+ if (uri == null) {
+ // TODO(mikesamuel): this is problematic for DOM nodes parsed without
+ // proper debugging info.
+ if (!InputSource.UNKNOWN.equals(pos.source())) {
+ uri = pos.source().getUri();
+ }
+ }
+ return (uri != null && uri.isAbsolute() && !uri.isOpaque()) ? uri :
null;
+ }
+
+ private URI baseUriForDoc(Node root) {
+ if (root instanceof Element) {
+ Element el = (Element) root;
+ if (BASE.is((Element) root)) {
+ return uriFromBaseElement(el);
+ } else {
+ for (Element base : Nodes.nodeListIterable(
+ el.getElementsByTagNameNS(BASE.ns.uri, BASE.localName),
+ Element.class)) {
+ URI uri = uriFromBaseElement(base);
+ if (uri != null) { return uri; }
+ }
+ }
+ return null;
+ } else {
+ for (Node c : Nodes.childrenOf(root)) {
+ URI uri = baseUriForDoc(c);
+ if (uri != null) { return uri; }
+ }
+ }
+ return null;
+ }
+
+ private URI uriFromBaseElement(Element base) {
+ Attr a = base.getAttributeNodeNS(BASE_HREF.ns.uri,
BASE_HREF.localName);
+ if (a == null) { return null; }
+ String value = a.getValue();
+ try {
+ URI uri = new URI(value);
+ return uri.isAbsolute() && !uri.isOpaque() ? uri : null;
+ } catch (URISyntaxException ex) {
+ return null;
+ }
+ }
+
+ public boolean apply(Jobs jobs) {
+ for (Job job : jobs.getJobsByType(Job.JobType.HTML)) {
+ Dom dom = job.getRoot().cast(Dom.class).node;
+ Node root = dom.getValue();
+ URI baseUri = baseUri(root, dom.getFilePosition());
+ if (baseUri != null) {
+ resolveRelativeUrls(root, baseUri);
+ }
+ }
+ return true;
+ }
+
+ private void resolveRelativeUrls(Node n, URI base) {
+ if (n instanceof Element) {
+ Element el = (Element) n;
+ ElKey elKey = ElKey.forElement(el);
+ for (Attr a : Nodes.attributesOf(el)) {
+ AttribKey aKey = AttribKey.forAttribute(elKey, a);
+ // If we ignored a relative base href, don't make it valid based
on a
+ // later one.
+ if (BASE_HREF.equals(aKey)) { continue; }
+ HTML.Attribute attrInfo = schema.lookupAttribute(aKey);
+ if (attrInfo != null && attrInfo.getType() ==
HTML.Attribute.Type.URI) {
+ String value = a.getValue();
+ // Don't muck with inter-document references.
+ if (value.startsWith("#")) { continue; }
+ URI uri = UriUtil.resolve(base, value);
+ if (uri != null && uri.isAbsolute()) {
+ FilePosition valuePos = Nodes.getFilePositionForValue(a);
+ a.setValue(base.resolve(uri).toString());
+ Nodes.setFilePositionForValue(a, valuePos);
+ }
+ }
+ }
+ }
+ for (Node c = n.getFirstChild(); c != null; c = c.getNextSibling()) {
+ resolveRelativeUrls(c, base);
+ }
+ }
+}
=======================================
--- /dev/null
+++ /trunk/src/com/google/caja/util/ContentType.java Wed Dec 23 23:20:46
2009
@@ -0,0 +1,73 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.util;
+
+import com.google.caja.util.Maps;
+import com.google.caja.util.Strings;
+
+import java.util.Map;
+
+/**
+ * A file type.
+ *
+ * @author mikes...@gmail.com
+ */
+public enum ContentType {
+ CSS("text/css", "css", true),
+ JS("text/javascript", "js", true),
+ JSON("application/json", "json", true),
+ HTML("text/html", "html", true),
+ XML("application/xhtml+xml", "xhtml", true),
+ ZIP("application/zip", "zip", false),
+ ;
+
+ public final String mimeType;
+ public final String ext;
+ public final boolean isText;
+
+ ContentType(String contentType, String ext, boolean isText) {
+ this.mimeType = contentType;
+ this.ext = ext;
+ this.isText = isText;
+ }
+
+ private static final Map<String, ContentType> MIME_TYPES
+ = Maps.<String, ContentType>immutableMap()
+ // From http://krijnhoetmer.nl/stuff/javascript/mime-types/ and
others.
+ .put("text/javascript", ContentType.JS)
+ .put("application/x-javascript", ContentType.JS)
+ .put("application/javascript", ContentType.JS)
+ .put("text/ecmascript", ContentType.JS)
+ .put("application/ecmascript", ContentType.JS)
+ .put("text/jscript", ContentType.JS)
+ .put("text/css", ContentType.CSS)
+ .put("text/html", ContentType.HTML)
+ .put("application/xml", ContentType.XML)
+ .put("application/xhtml+xml", ContentType.XML)
+ .put("application/x-winzip", ContentType.ZIP)
+ .put("application/zip", ContentType.ZIP)
+ .put("application/x-java-archive", ContentType.ZIP)
+ .create();
+
+ public static ContentType fromMimeType(String mimeType) {
+ int end = mimeType.indexOf(';');
+ if (end >= 0) {
+ while (end > 0 && Character.isWhitespace(mimeType.charAt(end))) {
--end; }
+ } else {
+ end = mimeType.length();
+ }
+ return MIME_TYPES.get(Strings.toLowerCase(mimeType.substring(0, end)));
+ }
+}
=======================================
--- /dev/null
+++ /trunk/tests/com/google/caja/plugin/stages/ResolveUriStageTest.java Wed
Dec 23 23:20:46 2009
@@ -0,0 +1,97 @@
+// Copyright (C) 2009 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.caja.plugin.stages;
+
+import com.google.caja.lang.html.HtmlSchema;
+import com.google.caja.plugin.Job;
+import com.google.caja.plugin.Jobs;
+
+public class ResolveUriStageTest extends PipelineStageTestCase {
+
+ public final void testEmptyDoc() throws Exception {
+ assertPipeline(
+ job("", Job.JobType.HTML),
+ job("", Job.JobType.HTML));
+ }
+
+ public final void testLink() throws Exception {
+ assertPipeline(
+ job("<a href=foo.html>foo</a>", Job.JobType.HTML),
+ job("<a href=\"test:/foo.html\">foo</a>", Job.JobType.HTML));
+ }
+
+ public final void testAnchorOnly() throws Exception {
+ assertPipeline(
+ job("<a href=#bar>foo</a>", Job.JobType.HTML),
+ job("<a href=\"#bar\">foo</a>", Job.JobType.HTML));
+ }
+
+ public final void testLinkWithAnchor() throws Exception {
+ assertPipeline(
+ job("<a href=foo.html#bar>foo</a>", Job.JobType.HTML),
+ job("<a href=\"test:/foo.html#bar\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ public final void testLinkWithBase() throws Exception {
+ assertPipeline(
+ job("<base href=http://example.org/bar/baz/foo.html>"
+ + "<a href=../boo.html>foo</a>",
+ Job.JobType.HTML),
+ job("<base href=\"http://example.org/bar/baz/foo.html\" />"
+ + "<a href=\"http://example.org/bar/boo.html\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ public final void testUnresolvableUrl() throws Exception {
+ assertPipeline(
+ job("<base href=http://example.org/bar/baz/foo.html>"
+ + "<a href=../../../../boo.html>foo</a>",
+ Job.JobType.HTML),
+ job("<base href=\"http://example.org/bar/baz/foo.html\" />"
+ + "<a href=\"../../../../boo.html\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ public final void testMalformedUrl() throws Exception {
+ assertPipeline(
+ job("<a href='foo bar'>foo</a>",
+ Job.JobType.HTML),
+ job("<a href=\"test:/foo%20bar\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ public final void testOpaqueUrl() throws Exception {
+ assertPipeline(
+ job("<a href=mailto:b...@example.com>foo</a>",
+ Job.JobType.HTML),
+ job("<a href=\"mailto:bob%40example.com\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ public final void testJavascriptUrl() throws Exception {
+ assertPipeline(
+ job("<a href='javascript:foo() + bar([1, 2, 3]) * 4'>foo</a>",
+ Job.JobType.HTML),
+ job("<a href=\"javascript:"
+
+ "foo%28%29%20+%20bar%28%5B1,%202,%203%5D%29%20%2A%204\">foo</a>",
+ Job.JobType.HTML));
+ }
+
+ @Override
+ protected boolean runPipeline(Jobs jobs) throws Exception {
+ return new ResolveUriStage(HtmlSchema.getDefault(mq)).apply(jobs);
+ }
+}
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/ContentType.java Mon Nov
30 20:44:55 2009
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright (C) 2009 Google Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.caja.ancillary.servlet;
-
-import com.google.caja.lexer.CharProducer;
-import com.google.caja.lexer.CssLexer;
-import com.google.caja.lexer.CssTokenType;
-import com.google.caja.lexer.FilePosition;
-import com.google.caja.lexer.ParseException;
-import com.google.caja.lexer.Token;
-import com.google.caja.reporting.DevNullMessageQueue;
-import com.google.caja.util.Maps;
-import com.google.caja.util.Strings;
-
-import java.util.Map;
-
-/**
- * A file type.
- *
- * @author mikes...@gmail.com
- */
-enum ContentType {
- CSS("text/css", "css", true),
- JS("text/javascript", "js", true),
- JSON("application/json", "json", true),
- HTML("text/html", "html", true),
- XML("application/xhtml+xml", "xhtml", true),
- ZIP("application/zip", "zip", false),
- ;
-
- final String mimeType;
- final String ext;
- final boolean isText;
-
- ContentType(String contentType, String ext, boolean isText) {
- this.mimeType = contentType;
- this.ext = ext;
- this.isText = isText;
- }
-
- private static final Map<String, ContentType> MIME_TYPES
- = Maps.<String, ContentType>immutableMap()
- // From http://krijnhoetmer.nl/stuff/javascript/mime-types/ and
others.
- .put("text/javascript", ContentType.JS)
- .put("application/x-javascript", ContentType.JS)
- .put("application/javascript", ContentType.JS)
- .put("text/ecmascript", ContentType.JS)
- .put("application/ecmascript", ContentType.JS)
- .put("text/jscript", ContentType.JS)
- .put("text/css", ContentType.CSS)
- .put("text/html", ContentType.HTML)
- .put("application/xml", ContentType.XML)
- .put("application/xhtml+xml", ContentType.XML)
- .put("application/x-winzip", ContentType.ZIP)
- .put("application/zip", ContentType.ZIP)
- .put("application/x-java-archive", ContentType.ZIP)
- .create();
-
- static ContentType guess(String mimeType, String path, CharSequence
code) {
- ContentType contentType = null;
- if (mimeType != null) {
- int semi = mimeType.indexOf(';');
- String baseType = semi < 0 ? mimeType : mimeType.substring(0, semi);
- contentType = MIME_TYPES.get(Strings.toLowerCase(baseType).trim());
- }
- if (contentType == null && path != null) {
- int dot = path.lastIndexOf('.');
- if (dot >= 0) {
- String ext = path.substring(dot + 1);
- for (ContentType candidate : ContentType.values()) {
- if (ext.equals(candidate.ext)) {
- contentType = candidate;
- break;
- }
- }
- }
- }
- if (contentType == null && code != null) {
- char ch = '\0';
- for (int i = 0, n = code.length(); i < n; ++i) {
- ch = code.charAt(i);
- if (!Character.isWhitespace(ch)) { break; }
- }
- switch (ch) {
- case '<':
- contentType = ContentType.HTML;
- break;
- case '@': case '.': case '#':
- contentType = ContentType.CSS;
- break;
- }
- }
- if (contentType == null && code != null) {
- // Try and lex and see what happens.
- CharProducer cp = CharProducer.Factory.fromString(
- code, FilePosition.UNKNOWN);
- try {
- CssLexer cssLexer = new CssLexer(
- cp, DevNullMessageQueue.singleton(), false);
- contentType = ContentType.CSS;
- while (cssLexer.hasNext()) {
- Token<CssTokenType> t = cssLexer.next();
- if ("if".equals(t.text) || "while".equals(t.text)
- || "for".equals(t.text) || "return".equals(t.text)) {
- contentType = ContentType.JS;
- break;
- }
- }
- } catch (ParseException ex) {
- contentType = ContentType.JS;
- }
- }
- return contentType;
- }
-}
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/CajaWebToolsServlet.java
Thu Dec 10 14:39:38 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/CajaWebToolsServlet.java
Wed Dec 23 23:20:46 2009
@@ -24,6 +24,7 @@
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.SimpleMessageQueue;
+import com.google.caja.util.ContentType;
import com.google.caja.util.Lists;
import com.google.caja.util.Pair;

@@ -229,8 +230,9 @@
CharProducer cp = CharProducer.Factory.fromString(input.code, is);
req.srcMap.put(is, cp.clone());
req.mc.addInputSource(is);
+ URI baseUri = req.baseUri != null ? req.baseUri : is.getUri();
try {
- inputJobs.add(p.parse(cp, input.t, null));
+ inputJobs.add(p.parse(cp, input.t, null, baseUri));
} catch (ParseException ex) {
ex.toMessageQueue(mq);
}
@@ -242,9 +244,11 @@
// Take the inputs and generate output jobs.
List<Job> jobs;
if (req.verb == Verb.INDEX) {
- jobs = Collections.singletonList(Job.html(IndexPage.render(req)));
+ jobs = Collections.singletonList(
+ Job.html(IndexPage.render(req), null));
} else if (req.verb == Verb.HELP) {
- jobs =
Collections.singletonList(Job.html(HelpPage.render(staticFiles)));
+ jobs = Collections.singletonList(
+ Job.html(HelpPage.render(staticFiles), null));
} else {
try {
jobs = p.process(inputJobs);
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Content.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/Content.java Wed Dec 23
23:20:46 2009
@@ -14,6 +14,8 @@

package com.google.caja.ancillary.servlet;

+import com.google.caja.util.ContentType;
+
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/IndexPage.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/IndexPage.java Wed Dec 23
23:20:46 2009
@@ -17,6 +17,7 @@
import com.google.caja.parser.html.DomParser;
import com.google.caja.parser.html.HtmlQuasiBuilder;
import com.google.caja.reporting.MessageLevel;
+import com.google.caja.util.ContentType;
import com.google.caja.util.Join;
import com.google.caja.util.Lists;
import com.google.caja.util.Sets;
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Input.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/Input.java Wed Dec 23
23:20:46 2009
@@ -14,6 +14,8 @@

package com.google.caja.ancillary.servlet;

+import com.google.caja.util.ContentType;
+
/**
* Encapsulates a source file uploaded to the tools servlet.
*
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Job.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/Job.java Wed Dec 23
23:20:46 2009
@@ -17,6 +17,9 @@
import com.google.caja.parser.css.CssTree;
import com.google.caja.parser.js.Block;
import com.google.caja.parser.js.ObjectConstructor;
+import com.google.caja.util.ContentType;
+
+import java.net.URI;

import org.w3c.dom.Attr;
import org.w3c.dom.DocumentFragment;
@@ -38,35 +41,38 @@
* For JS this might be a {@code script} element or {@code onclick}
attribute.
*/
final Node origin;
-
- private Job(ContentType t, Object root, Node origin) {
+ /** Base URI for the job */
+ final URI baseUri;
+
+ private Job(ContentType t, Object root, Node origin, URI baseUri) {
this.t = t;
this.root = root;
this.origin = origin;
+ this.baseUri = baseUri;
}

- static Job js(Block root, Node origin) {
- return new Job(ContentType.JS, root, origin);
+ static Job js(Block root, Node origin, URI baseUri) {
+ return new Job(ContentType.JS, root, origin, baseUri);
}

- static Job json(ObjectConstructor root) {
- return new Job(ContentType.JSON, root, null);
+ static Job json(ObjectConstructor root, URI baseUri) {
+ return new Job(ContentType.JSON, root, null, baseUri);
}

- static Job css(CssTree.StyleSheet css, Element origin) {
- return new Job(ContentType.CSS, css, origin);
+ static Job css(CssTree.StyleSheet css, Element origin, URI baseUri) {
+ return new Job(ContentType.CSS, css, origin, baseUri);
}

- static Job css(CssTree.DeclarationGroup css, Attr origin) {
- return new Job(ContentType.CSS, css, origin);
+ static Job css(CssTree.DeclarationGroup css, Attr origin, URI baseUri) {
+ return new Job(ContentType.CSS, css, origin, baseUri);
}

- static Job html(DocumentFragment fragment) {
- return new Job(ContentType.HTML, fragment, null);
+ static Job html(DocumentFragment fragment, URI baseUri) {
+ return new Job(ContentType.HTML, fragment, null, baseUri);
}

static Job zip(byte[] zipBody) {
- return new Job(ContentType.ZIP, zipBody, null);
+ return new Job(ContentType.ZIP, zipBody, null, null);
}

@Override
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Processor.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/Processor.java Wed Dec 23
23:20:46 2009
@@ -14,6 +14,7 @@

package com.google.caja.ancillary.servlet;

+import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.ancillary.jsdoc.HtmlRenderer;
import com.google.caja.ancillary.jsdoc.Jsdoc;
import com.google.caja.ancillary.jsdoc.JsdocException;
@@ -41,7 +42,6 @@
import com.google.caja.parser.html.DomParser;
import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.html.HtmlQuasiBuilder;
-import com.google.caja.parser.html.Namespaces;
import com.google.caja.parser.html.Nodes;
import com.google.caja.parser.js.Block;
import com.google.caja.parser.js.Expression;
@@ -49,7 +49,10 @@
import com.google.caja.parser.js.Parser;
import com.google.caja.parser.js.Statement;
import com.google.caja.plugin.CssValidator;
+import com.google.caja.plugin.PluginEnvironment;
import com.google.caja.plugin.PluginMessageType;
+import com.google.caja.plugin.stages.EmbeddedContent;
+import com.google.caja.plugin.stages.HtmlEmbeddedContentFinder;
import com.google.caja.render.Concatenator;
import com.google.caja.render.CssMinimalPrinter;
import com.google.caja.render.CssPrettyPrinter;
@@ -61,12 +64,15 @@
import com.google.caja.reporting.MessageQueue;
import com.google.caja.reporting.MessageTypeInt;
import com.google.caja.reporting.RenderContext;
+import com.google.caja.util.ContentType;
import com.google.caja.util.Lists;
import com.google.caja.util.Sets;
import com.google.caja.util.Strings;

import java.io.File;
import java.io.IOException;
+import java.net.URI;
+
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -122,7 +128,7 @@
}
break;
case LINT:
- output.add(Job.html(LintPage.render(reduce(jobs), req, mq)));
+ output.add(Job.html(LintPage.render(reduce(jobs), req, mq), null));
break;
default:
for (Job job : jobs) {
@@ -248,7 +254,7 @@
}

/** Parse a job from input parameters. */
- Job parse(CharProducer cp, ContentType contentType, Node src)
+ Job parse(CharProducer cp, ContentType contentType, Node src, URI
baseUri)
throws ParseException {
FilePosition inputRange = cp.filePositionForOffsets(
cp.getOffset(), cp.getLimit());
@@ -273,7 +279,7 @@
Element el = p.parseDocument();
DocumentFragment f =
el.getOwnerDocument().createDocumentFragment();
f.appendChild(el);
- return Job.html(f);
+ return Job.html(f, baseUri);
}
} else {
lexer.setTreatedAsXml(contentType == ContentType.XML);
@@ -282,19 +288,20 @@
tq.setInputRange(inputRange);
p = new DomParser(tq, contentType == ContentType.XML, mq);
}
- return Job.html(p.parseFragment());
+ return Job.html(p.parseFragment(), baseUri);
}
case JS: {
JsLexer lexer = new JsLexer(cp);
JsTokenQueue tq = new JsTokenQueue(lexer, is);
if (tq.isEmpty()) {
return Job.js(
- new Block(inputRange, Collections.<Statement>emptyList()),
src);
+ new Block(inputRange, Collections.<Statement>emptyList()),
src,
+ baseUri);
}
tq.setInputRange(inputRange);
Block program = new Parser(tq, mq, false).parse();
tq.expectEmpty();
- return Job.js(program, src);
+ return Job.js(program, src, baseUri);
}
case JSON: { // TODO: use a JSON only lexer.
JsLexer lexer = new JsLexer(cp);
@@ -305,7 +312,7 @@
tq.setInputRange(inputRange);
Expression e = new Parser(tq, mq, false).parseExpressionPart(true);
tq.expectEmpty();
- return Job.json((ObjectConstructor) e);
+ return Job.json((ObjectConstructor) e, baseUri);
}
case CSS: {
TokenQueue<CssTokenType> tq = CssParser.makeTokenQueue(cp, mq,
false);
@@ -314,10 +321,10 @@
Job job;
if (src instanceof Attr) {
CssTree.DeclarationGroup dg = p.parseDeclarationGroup();
- job = Job.css(dg, (Attr) src);
+ job = Job.css(dg, (Attr) src, baseUri);
} else {
CssTree.StyleSheet ss = p.parseStyleSheet();
- job = Job.css(ss, (Element) src); // src may be null
+ job = Job.css(ss, (Element) src, baseUri); // src may be null
}
tq.expectEmpty();
return job;
@@ -368,90 +375,45 @@
private List<Job> extractJobs(Job job) {
List<Job> all = Lists.newArrayList(job);
if (job.t == ContentType.XML || job.t == ContentType.HTML) {
- extractJobs((Node) job.root, all);
+ extractJobs((Node) job.root, job.baseUri, all);
}
return all;
}

- private void extractJobs(Node node, List<Job> out) {
- if (node instanceof Element) {
- Element el = (Element) node;
- ElKey elKey = ElKey.forElement(el);
- if (Namespaces.isHtml(elKey.ns.uri)
- && ("script".equals(elKey.localName)
- || "style".equals(elKey.localName))) {
- String mimeType = el.getAttributeNS(elKey.ns.uri, "type");
- boolean isScript = "script".equals(elKey.localName);
- if (!isScript || !el.hasAttribute("src")) {
- if ("".equals(mimeType)) {
- mimeType = isScript ? "text/javascript" : "text/css";
- }
- CharProducer cp = cdataProducer(node);
- try {
- out.add(parse(cp, ContentType.guess(
- mimeType, null, cp.clone()), el));
- } catch (ParseException ex) {
- ex.toMessageQueue(mq);
- }
- }
- }
- for (Attr a : Nodes.attributesOf(el)) {
- AttribKey aKey = AttribKey.forAttribute(elKey, a);
- HTML.Attribute aInfo = req.htmlSchema.lookupAttribute(aKey);
- if (aInfo != null) {
- HTML.Attribute.Type aType = aInfo.getType();
- switch (aType) {
- case SCRIPT:
- case STYLE:
- if (!"".equals(a.getValue().trim())) {
- CharProducer cp = attribProducer(a);
- ContentType ct = aType == HTML.Attribute.Type.SCRIPT
- ? ContentType.JS : ContentType.CSS;
- try {
- out.add(parse(cp, ct, a));
- } catch (ParseException ex) {
- ex.toMessageQueue(mq);
- }
- }
- break;
- default: break;
- }
+ private void extractJobs(Node node, URI baseUri, List<Job> out) {
+ HtmlEmbeddedContentFinder f = new HtmlEmbeddedContentFinder(
+ req.htmlSchema, req.baseUri, mq, req.mc);
+ PluginEnvironment env = null;
+ for (EmbeddedContent c : f.findEmbeddedContent(node)) {
+ if (c.getType() != null) {
+ Node src = c.getSource();
+ ParseTreeNode t;
+ try {
+ t = c.parse(env, mq);
+ } catch (ParseException ex) {
+ ex.toMessageQueue(mq);
+ continue;
+ }
+ switch (c.getType()) {
+ case JS:
+ if (src instanceof Element) {
+ out.add(Job.js((Block) t, (Element) src, baseUri));
+ } else {
+ out.add(Job.js((Block) t, (Attr) src, baseUri));
+ }
+ break;
+ case CSS:
+ if (src instanceof Element) {
+ out.add(Job.css((CssTree.StyleSheet) t, (Element) src,
baseUri));
+ } else {
+ out.add(Job.css(
+ (CssTree.DeclarationGroup) t, (Attr) src, baseUri));
+ }
+ break;
+ default: throw new SomethingWidgyHappenedError();
}
}
}
- for (Node child : Nodes.childrenOf(node)) {
- extractJobs(child, out);
- }
- }
-
- private static CharProducer cdataProducer(Node node) {
- List<CharProducer> cps = Lists.newArrayList();
- for (Node child : Nodes.childrenOf(node)) {
- switch (child.getNodeType()) {
- case Node.TEXT_NODE:
- case Node.CDATA_SECTION_NODE:
- cps.add(
- CharProducer.Factory.fromString(
- child.getNodeValue(),
- Nodes.getFilePositionFor(child)));
- break;
- }
- }
- return CharProducer.Factory.chain(cps.toArray(new CharProducer[0]));
- }
-
- private CharProducer attribProducer(Attr a) {
- String rawText = Nodes.getRawValue(a);
- FilePosition pos = Nodes.getFilePositionForValue(a);
- int rawTextLen = rawText.length();
- if (rawTextLen >= 2) {
- char ch = rawText.charAt(0);
- if ((ch == '"' || ch == '\'') && ch == rawText.charAt(rawTextLen -
1)) {
- rawText = " " + rawText.substring(1, rawTextLen - 1) + " ";
- }
- }
- return CharProducer.Factory.fromHtmlAttribute(
- CharProducer.Factory.fromString(rawText, pos));
}

/** Find problems in code. */
@@ -551,7 +513,7 @@
optimized = new Block(
optimized.getFilePosition(),
Collections.singletonList(optimized));
}
- return Job.js((Block) optimized, job.origin);
+ return Job.js((Block) optimized, job.origin, job.baseUri);
}

private Job optimizeHtml(Job job) {
@@ -602,7 +564,7 @@
}
ObjectConstructor json = jsdoc.extract();
if (req.otype == ContentType.JSON) {
- return Job.json(json);
+ return Job.json(json, null);
} else {
ZipFileSystem fs = new ZipFileSystem("/jsdoc");
StringBuilder jsonSb = new StringBuilder();
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/Request.java Thu Dec 3
17:21:42 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/Request.java Wed Dec 23
23:20:46 2009
@@ -21,6 +21,7 @@
import com.google.caja.reporting.EchoingMessageQueue;
import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessageLevel;
+import com.google.caja.util.ContentType;
import com.google.caja.util.Lists;
import com.google.caja.util.Maps;
import com.google.caja.util.Multimap;
@@ -166,7 +167,7 @@
int semi = mimeType.indexOf(';');
if (semi >= 0) { mimeType = mimeType.substring(0, semi); }
}
- ContentType ct = ContentType.guess(mimeType, path, val);
+ ContentType ct = GuessContentType.guess(mimeType, path, val);
if (path == null || "".equals(path)) {
path = "unnamed-" + c.inputs.size() + "." + ct.ext;
}
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/StaticFiles.java Thu Dec
3 17:21:42 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/StaticFiles.java Wed Dec
23 23:20:46 2009
@@ -18,6 +18,7 @@
import com.google.caja.lexer.InputSource;
import com.google.caja.lexer.ParseException;
import com.google.caja.reporting.DevNullMessageQueue;
+import com.google.caja.util.ContentType;
import com.google.caja.util.Lists;

import java.io.ByteArrayOutputStream;
@@ -92,7 +93,7 @@
try {
// TODO(mikesamuel): SVN has it in svn:mime-type, but that is not
// available via the ClassLoader. Is there any way to get at it?
- ContentType t = ContentType.guess(null, path, null);
+ ContentType t = GuessContentType.guess(null, path, null);
if (t != null && t.isText) {
InputSource is;
try {
@@ -110,7 +111,7 @@
min.otype = t;
Processor p = new Processor(min,
DevNullMessageQueue.singleton());
try {
- Job j = p.parse(cp, t, null);
+ Job j = p.parse(cp, t, null, is.getUri());
List<Job> out = p.process(Lists.newArrayList(j));
if (out.size() == 1) {
content = p.reduce(out);
=======================================
--- /trunk/src/com/google/caja/ancillary/servlet/UriFetcher.java Mon Nov 30
20:44:55 2009
+++ /trunk/src/com/google/caja/ancillary/servlet/UriFetcher.java Wed Dec 23
23:20:46 2009
@@ -14,6 +14,7 @@

package com.google.caja.ancillary.servlet;

+import com.google.caja.util.ContentType;
import com.google.caja.util.Strings;

import java.io.IOException;
@@ -47,7 +48,7 @@
} finally {
in.close();
}
- ContentType t = ContentType.guess(
+ ContentType t = GuessContentType.guess(
conn.getContentType(), uri.getPath(), text);
return new Content(text.toString(), t);
}
=======================================
---
/trunk/src/com/google/caja/demos/playground/client/PlaygroundService.java
Fri Dec 18 12:42:18 2009
+++
/trunk/src/com/google/caja/demos/playground/client/PlaygroundService.java
Wed Dec 23 23:20:46 2009
@@ -5,7 +5,7 @@

/**
* The client side stub for the RPC cajoling service.
- *
+ *
* @author jas...@gmail.com (Jasvir Nagra)
*/
@RemoteServiceRelativePath("cajole")
@@ -16,7 +16,7 @@
public final int HTML = 0;
public final int JAVASCRIPT = 1;
public final int ERRORS = 2;
-
+
/**
* Cajoles input and returns cajoled output and error messages
* @param uri Set input source to uri (used only for error messages)
@@ -29,7 +29,7 @@
*/
// TODO(jasvir): Coax gwt to serialize and return CajolingResult instead
String[] cajole(String uri, String input);
-
+
/**
* Returns build info as a string
*/
@@ -37,7 +37,7 @@

/**
* Fetches the document located at {@code uri} as a string
- * @param url
+ * @param url the URL to fetch.
* @return the document if it exists, null else
*/
// TODO(jasvir): Fetching ought to be done via a separate service
=======================================
--- /trunk/src/com/google/caja/lexer/CharProducer.java Thu Dec 10 17:39:38
2009
+++ /trunk/src/com/google/caja/lexer/CharProducer.java Wed Dec 23 23:20:46
2009
@@ -28,7 +28,8 @@
* @author mikes...@gmail.com
*/
public abstract class CharProducer implements CharSequence {
- private int offset, limit;
+ private int offset;
+ private final int limit;
private final char[] buf;

CharProducer(char[] buf, int limit) {
@@ -250,6 +251,10 @@
}
}, p);
}
+
+ public static CharProducer fromUri(CharProducer p) {
+ return DecodingCharProducer.make(new UriDecoder(), p);
+ }

/**
* A CharProducer that contains the concatenation of the given
character
=======================================
--- /trunk/src/com/google/caja/lexer/escaping/UriUtil.java Fri May 8
16:34:59 2009
+++ /trunk/src/com/google/caja/lexer/escaping/UriUtil.java Wed Dec 23
23:20:46 2009
@@ -15,7 +15,9 @@
package com.google.caja.lexer.escaping;

import com.google.caja.util.Join;
-
+import com.google.caja.util.Strings;
+
+import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
@@ -101,7 +103,7 @@
normalizeAuthority(authority, sb);
}
if (path.length() != 0 || sb.length() != 0) {
- normalizePath(path, sb.length() != 0, sb);
+ normalizePath(path, sb.length() != 0 && !isOpaque(scheme), sb);
}
if (query != null) {
sb.append('?');
@@ -113,6 +115,20 @@
}
return sb.toString();
}
+
+ public static URI resolve(URI base, String relative) {
+ URI abs = base.resolve(normalizeUri(relative));
+ if (!abs.isOpaque()) {
+ String path = abs.getPath();
+ // Workaround a bug in java.net.URI.
+ // TODO(mikesamuel): stop using java.net.URI and use a decent URL
+ // implementation instead.
+ if (path != null && (path.startsWith("/../") || path.equals("/..")))
{
+ return null;
+ }
+ }
+ return abs;
+ }

private static void normalizeScheme(String scheme, StringBuilder out) {
// Section 3.1:
@@ -136,6 +152,13 @@
}
out.append(scheme, pos, n);
}
+
+ private static boolean isOpaque(String scheme) {
+ return Strings.equalsIgnoreCase("mailto", scheme)
+ || Strings.equalsIgnoreCase("javascript", scheme)
+ || Strings.equalsIgnoreCase("content", scheme)
+ || Strings.equalsIgnoreCase("data", scheme);
+ }

private static void normalizeAuthority(String authority, StringBuilder
out) {
// Section 3.2:
=======================================
--- /trunk/src/com/google/caja/parser/css/CssParser.java Thu Apr 30
13:05:14 2009
+++ /trunk/src/com/google/caja/parser/css/CssParser.java Wed Dec 23
23:20:46 2009
@@ -30,12 +30,12 @@
import com.google.caja.reporting.MessageType;
import com.google.caja.reporting.MessageTypeInt;
import com.google.caja.util.Criterion;
+import com.google.caja.util.Lists;
import com.google.caja.util.Name;
import com.google.caja.util.Strings;

import java.net.URI;
import java.net.URISyntaxException;
-import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -210,6 +210,8 @@
this.tolerance = tolerance;
this.isTolerant = isTolerant();
}
+
+ public TokenQueue<CssTokenType> getTokenQueue() { return tq; }

public CssTree.StyleSheet parseStyleSheet() throws ParseException {
// stylesheet
@@ -217,7 +219,7 @@
// [S|CDO|CDC]* [ import [S|CDO|CDC]* ]*
// [ [ ruleset | media | page ] [S|CDO|CDC]* ]*
Mark m = tq.mark();
- List<CssTree.CssStatement> stmts = new
ArrayList<CssTree.CssStatement>();
+ List<CssTree.CssStatement> stmts = Lists.newArrayList();
while (true) {
skipTopLevelIgnorables();
if (!lookaheadSymbol("@import")) { break; }
@@ -235,7 +237,7 @@
public CssTree.DeclarationGroup parseDeclarationGroup()
throws ParseException {
Mark m = tq.mark();
- List<CssTree.Declaration> decls = new ArrayList<CssTree.Declaration>();
+ List<CssTree.Declaration> decls = Lists.newArrayList();
while (!tq.isEmpty()) {
while (tq.lookaheadToken(";")) { tq.advance(); }
if (tq.isEmpty()) { break; }
@@ -261,7 +263,7 @@
}
List<CssTree.Medium> media = Collections.<CssTree.Medium>emptyList();
if (!tq.checkToken(";")) {
- media = new ArrayList<CssTree.Medium>();
+ media = Lists.newArrayList();
do {
CssTree.Medium medium = parseMedium();
if (medium == null) {
@@ -310,7 +312,7 @@
private CssTree.Media parseMedia() throws ParseException {
Mark m = tq.mark();
expectSymbol("@media");
- List<CssTree> children = new ArrayList<CssTree>();
+ List<CssTree> children = Lists.newArrayList();
do {
CssTree.Medium medium = parseMedium();
if (medium == null) {
@@ -343,7 +345,7 @@
ident = unescape(t);
tq.advance();
}
- List<CssTree.PageElement> elements = new
ArrayList<CssTree.PageElement>();
+ List<CssTree.PageElement> elements = Lists.newArrayList();
if (tq.lookaheadToken(":")) {
Mark m2 = tq.mark();
tq.expectToken(":");
@@ -362,7 +364,7 @@
private CssTree.FontFace parseFontFace() throws ParseException {
Mark m = tq.mark();
expectSymbol("@font-face");
- List<CssTree.Declaration> elements = new
ArrayList<CssTree.Declaration>();
+ List<CssTree.Declaration> elements = Lists.newArrayList();
if (parseDeclarationBlock(elements, m)) { return null; }
return new CssTree.FontFace(pos(m), elements);
}
@@ -428,7 +430,7 @@

private CssTree.RuleSet parseRuleSet() throws ParseException {
Mark m = tq.mark();
- List<CssTree> elements = new ArrayList<CssTree>();
+ List<CssTree> elements = Lists.newArrayList();
do {
CssTree.Selector sel = parseSelector();
addIfNotNull(elements, sel);
@@ -443,7 +445,7 @@

private CssTree.Selector parseSelector() throws ParseException {
Mark m = tq.mark();
- List<CssTree> elements = new ArrayList<CssTree>();
+ List<CssTree> elements = Lists.newArrayList();
while (true) {
if (!elements.isEmpty()) {
elements.add(parseCombinator());
@@ -471,7 +473,7 @@

private CssTree.SimpleSelector parseSimpleSelector() throws
ParseException {
Mark m = tq.mark();
- List<CssTree> elements = new ArrayList<CssTree>();
+ List<CssTree> elements = Lists.newArrayList();
if (!tq.isEmpty()) {
Token<CssTokenType> t = tq.peek();
if (CssTokenType.IDENT == t.type) {
@@ -606,7 +608,7 @@
SKIP_TO_CHUNK_END_FROM_WITHIN_BLOCK.recover(this, m);
return null;
}
- List<CssTree> children = new ArrayList<CssTree>(3);
+ List<CssTree> children = Lists.newArrayList(3);
children.add(property);
if (expect(":", SKIP_TO_CHUNK_END_FROM_WITHIN_BLOCK, m)) {
return null;
@@ -653,7 +655,7 @@

private CssTree.Expr parseExpr() throws ParseException {
Mark m = tq.mark();
- List<CssTree> children = new ArrayList<CssTree>();
+ List<CssTree> children = Lists.newArrayList();
{
CssTree.Term term = parseTerm();
if (term == null) { return null; }
@@ -837,7 +839,7 @@
if (!tq.checkToken(".")) { return null; }
sb.append('.');
}
- List<ProgIdAttribute> attrs = new ArrayList<ProgIdAttribute>();
+ List<ProgIdAttribute> attrs = Lists.newArrayList();
if (!tq.checkToken(")")) {
do {
CssTree.ProgIdAttribute attr = parseProgIdAttribute();
=======================================
--- /trunk/src/com/google/caja/parser/html/AttribKey.java Fri Nov 13
11:43:08 2009
+++ /trunk/src/com/google/caja/parser/html/AttribKey.java Wed Dec 23
23:20:46 2009
@@ -49,6 +49,12 @@
this.localName = this.ns.uri == Namespaces.HTML_NAMESPACE_URI
? Strings.toLowerCase(localName) : localName;
}
+
+ public boolean is(Attr a) {
+ return ns.uri.equals(a.getNamespaceURI())
+ && localName.equals(a.getLocalName())
+ && el.is(a.getOwnerElement());
+ }

private static final Namespaces HTML_NS = Namespaces.HTML_DEFAULT.forUri(
Namespaces.HTML_NAMESPACE_URI);
=======================================
--- /trunk/src/com/google/caja/parser/html/ElKey.java Fri Nov 13 11:43:08
2009
+++ /trunk/src/com/google/caja/parser/html/ElKey.java Wed Dec 23 23:20:46
2009
@@ -66,6 +66,11 @@
}

public boolean isHtml() { return ns.uri ==
Namespaces.HTML_NAMESPACE_URI; }
+
+ public boolean is(Element el) {
+ return ns.uri.equals(el.getNamespaceURI())
+ && ("*".equals(localName) || localName.equals(el.getLocalName()));
+ }

public static ElKey forHtmlElement(String localName) {
return new ElKey(HTML_NS, Strings.toLowerCase(localName));
=======================================
--- /trunk/src/com/google/caja/parser/js/Block.java Thu Nov 12 13:05:03 2009
+++ /trunk/src/com/google/caja/parser/js/Block.java Wed Dec 23 23:20:46 2009
@@ -41,7 +41,9 @@
createMutation().appendChildren(elements).execute();
}

- public Block() { super(FilePosition.UNKNOWN, Statement.class); }
+ public Block(FilePosition pos) { super(pos, Statement.class); }
+
+ public Block() { this(FilePosition.UNKNOWN); }

@Override
public List<? extends Statement> children() {
=======================================
--- /trunk/src/com/google/caja/plugin/PluginCompiler.java Fri Dec 18
20:56:18 2009
+++ /trunk/src/com/google/caja/plugin/PluginCompiler.java Wed Dec 23
23:20:46 2009
@@ -28,6 +28,7 @@
import com.google.caja.plugin.stages.InlineCssImportsStage;
import com.google.caja.plugin.stages.LegacyNamespaceFixupStage;
import com.google.caja.plugin.stages.OpenTemplateStage;
+import com.google.caja.plugin.stages.ResolveUriStage;
import com.google.caja.plugin.stages.RewriteCssStage;
import com.google.caja.plugin.stages.RewriteHtmlStage;
import com.google.caja.plugin.stages.SanitizeHtmlStage;
@@ -127,7 +128,8 @@

List<Pipeline.Stage<Jobs>> stages = compilationPipeline.getStages();
stages.add(new LegacyNamespaceFixupStage());
- stages.add(new RewriteHtmlStage());
+ stages.add(new ResolveUriStage(htmlSchema));
+ stages.add(new RewriteHtmlStage(htmlSchema));
stages.add(new InlineCssImportsStage());
stages.add(new SanitizeHtmlStage(htmlSchema));
stages.add(new ValidateCssStage(cssSchema, htmlSchema));
=======================================
--- /trunk/src/com/google/caja/plugin/stages/RewriteHtmlStage.java Fri Nov
13 11:43:08 2009
+++ /trunk/src/com/google/caja/plugin/stages/RewriteHtmlStage.java Wed Dec
23 23:20:46 2009
@@ -14,53 +14,40 @@

package com.google.caja.plugin.stages;

+import com.google.caja.SomethingWidgyHappenedError;
import com.google.caja.lang.css.CssSchema;
-import com.google.caja.lexer.CharProducer;
-import com.google.caja.lexer.CssTokenType;
-import com.google.caja.lexer.ExternalReference;
-import com.google.caja.lexer.FilePosition;
-import com.google.caja.lexer.InputSource;
-import com.google.caja.lexer.JsLexer;
-import com.google.caja.lexer.JsTokenQueue;
+import com.google.caja.lang.html.HtmlSchema;
import com.google.caja.lexer.ParseException;
-import com.google.caja.lexer.TokenQueue;
import com.google.caja.parser.AncestorChain;
import com.google.caja.parser.MutableParseTreeNode;
import com.google.caja.parser.Visitor;
-import com.google.caja.parser.css.CssParser;
import com.google.caja.parser.css.CssTree;
+import com.google.caja.parser.html.AttribKey;
+import com.google.caja.parser.html.ElKey;
import com.google.caja.parser.html.Namespaces;
import com.google.caja.parser.html.Nodes;
import com.google.caja.parser.js.Block;
-import com.google.caja.parser.js.Parser;
-import com.google.caja.parser.js.StringLiteral;
import com.google.caja.plugin.Dom;
import com.google.caja.plugin.ExtractedHtmlContent;
import com.google.caja.plugin.Job;
import com.google.caja.plugin.Jobs;
import com.google.caja.plugin.PluginEnvironment;
import com.google.caja.plugin.PluginMessageType;
-import com.google.caja.reporting.Message;
-import com.google.caja.reporting.MessageLevel;
+import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
+import com.google.caja.util.Lists;
import com.google.caja.util.Name;
import com.google.caja.util.Pipeline;
-import com.google.caja.util.Strings;
-
-import java.io.StringReader;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
+import com.google.caja.util.Sets;
+
import java.util.Collections;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
-import org.w3c.dom.Text;

/**
* Extract some unsafe bits from HTML for processing by later stages.
@@ -83,234 +70,128 @@
* @author mikes...@gmail.com
*/
public class RewriteHtmlStage implements Pipeline.Stage<Jobs> {
+ private final HtmlSchema htmlSchema;
+
+ public RewriteHtmlStage(HtmlSchema htmlSchema) {
+ this.htmlSchema = htmlSchema;
+ }

public boolean apply(Jobs jobs) {
+ MessageQueue mq = jobs.getMessageQueue();
+ MessageContext mc = jobs.getMessageContext();
for (Job job : jobs.getJobsByType(Job.JobType.HTML)) {
Node root = ((Dom) job.getRoot().node).getValue();
- rewriteDomTree(root, root, jobs);
- }
- return jobs.hasNoFatalErrors();
- }
-
- private static final String HTML_NS = Namespaces.HTML_NAMESPACE_URI;
-
- void rewriteDomTree(Node root, Node n, Jobs jobs) {
- // Rewrite styles and scripts.
- // <script>foo()</script> -> <script>(cajoled foo)</script>
- // <style>foo { ... }</style> -> <style>foo { ... }</style>
- // <script src=foo.js></script> -> <script>(cajoled, inlined
foo)</script>
- // <link rel=stylesheet href=foo.css>
- // -> <style>(cajoled, inlined styles)</style>
- if (n.getNodeType() == Node.ELEMENT_NODE) {
- Element el = (Element) n;
- if (HTML_NS.equals(el.getNamespaceURI())) {
- String name = el.getLocalName();
- if ("script".equals(name)) {
- rewriteScriptTag(root, el, jobs);
- } else if ("style".equals(name)) {
- rewriteStyleTag(el, jobs);
- } else if ("link".equals(name)) {
- rewriteLinkTag(el, jobs);
- } else if ("body".equals(name)) {
- moveOnLoadHandlerToEndOfBody(el);
- }
+ HtmlEmbeddedContentFinder finder = new HtmlEmbeddedContentFinder(
+ htmlSchema, null, mq, mc);
+ for (EmbeddedContent content : finder.findEmbeddedContent(root)) {
+ Node src = content.getSource();
+ if (content.getSource() instanceof Element) {
+ // Rewrite styles and scripts.
+ // <script>foo()</script> -> <script>(cajoled foo)</script>
+ // <style>foo { ... }</style> -> <style>foo { ... }</style>
+ // <script src=foo.js></script>
+ // -> <script>(cajoled, inlined foo)</script>
+ // <link rel=stylesheet href=foo.css>
+ // -> <style>(cajoled, inlined styles)</style>
+ Element el = (Element) content.getSource();
+ if (SCRIPT.is(el)) {
+ rewriteScriptEl(root, content, jobs);
+ } else if (STYLE.is(el)) {
+ rewriteStyleEl(content, jobs);
+ } else if (LINK.is(el)) {
+ rewriteLinkEl(content, jobs);
+ } else {
+ throw new SomethingWidgyHappenedError(src.getNodeName());
+ }
+ } else if (BODY_ONLOAD.is((Attr) src)) {
+ moveOnLoadHandlerToEndOfBody(content, jobs);
+ }
+ // Attribute extraction handled elsewhere.
}
}
- for (Node child : Nodes.childrenOf(n)) {
- rewriteDomTree(root, child, jobs);
- }
+ return jobs.hasNoFatalErrors();
}

- private void rewriteScriptTag(Node root, Element scriptTag, Jobs jobs) {
- Node parent = scriptTag.getParentNode();
+ private static final ElKey BODY = ElKey.forHtmlElement("body");
+ private static final ElKey LINK = ElKey.forHtmlElement("link");
+ private static final ElKey SCRIPT = ElKey.forHtmlElement("script");
+ private static final ElKey STYLE = ElKey.forHtmlElement("style");
+ private static final AttribKey BODY_ONLOAD
+ = AttribKey.forHtmlAttrib(BODY, "onload");
+
+ private void rewriteScriptEl(Node root, EmbeddedContent c, Jobs jobs) {
+ Element scriptEl = (Element) c.getSource();
+ Node parent = scriptEl.getParentNode();
PluginEnvironment env = jobs.getPluginMeta().getPluginEnvironment();
-
- Attr type = scriptTag.getAttributeNodeNS(HTML_NS, "type");
- Attr src = scriptTag.getAttributeNodeNS(HTML_NS, "src");
- if (type != null && !isJavaScriptContentType(type.getNodeValue())) {
- jobs.getMessageQueue().addMessage(
- PluginMessageType.UNRECOGNIZED_CONTENT_TYPE,
- Nodes.getFilePositionFor(type),
- MessagePart.Factory.valueOf(type.getNodeValue()),
- MessagePart.Factory.valueOf(scriptTag.getTagName()));
- parent.removeChild(scriptTag);
- return;
- }
- // The script contents.
- CharProducer jsStream;
- FilePosition scriptPos;
- if (src == null) { // Parse the script tag body.
- jsStream = textNodesToCharProducer(Nodes.childrenOf(scriptTag),
true);
- if (jsStream == null) {
- parent.removeChild(scriptTag);
- return;
- }
- scriptPos = FilePosition.span(
- Nodes.getFilePositionFor(scriptTag.getFirstChild()),
- Nodes.getFilePositionFor(scriptTag.getLastChild()));
- } else { // Load the src attribute
- FilePosition srcPos = Nodes.getFilePositionFor(src);
- FilePosition srcValuePos = Nodes.getFilePositionForValue(src);
- URI srcUri;
- try {
- srcUri = new URI(src.getNodeValue());
- } catch (URISyntaxException ex) {
- jobs.getMessageQueue().getMessages().add(
- new Message(PluginMessageType.MALFORMED_URL,
MessageLevel.ERROR,
- srcPos,
MessagePart.Factory.valueOf(src.getNodeName()))
- );
- parent.removeChild(scriptTag);
- return;
- }
-
- // Fetch the script source.
- URI absUri = srcValuePos.source().getUri()
- .resolve(srcUri);
- jobs.getMessageContext().addInputSource(new InputSource(absUri));
- jsStream = env.loadExternalResource(
- new ExternalReference(absUri, srcValuePos), "text/javascript");
- if (jsStream == null) {
- jobs.getMessageQueue().addMessage(
- PluginMessageType.FAILED_TO_LOAD_EXTERNAL_URL,
- srcValuePos, MessagePart.Factory.valueOf("" + srcUri));
- // Throw an exception so any user installed error handler will
fire.
- jsStream = CharProducer.Factory.fromString(
- "throw new Error("
- + StringLiteral.toQuotedValue("Failed to load " +
srcUri.toString())
- + ");",
- srcPos);
- }
- scriptPos = null;
- }

// Parse the body and create a block that will be placed inline in
// loadModule.
Block parsedScriptBody;
try {
- parsedScriptBody = parseJs(jsStream.getCurrentPosition().source(),
- jsStream, scriptPos,
jobs.getMessageQueue());
+ parsedScriptBody = (Block) c.parse(env, jobs.getMessageQueue());
} catch (ParseException ex) {
ex.toMessageQueue(jobs.getMessageQueue());
- parsedScriptBody = null;
- }
-
- if (parsedScriptBody == null) {
- parent.removeChild(scriptTag);
+ parent.removeChild(scriptEl);
return;
}

- // Build a replacement element, <span/>, and link it to the extracted
- // javascript, so that when the DOM is rendered, we can properly
interleave
- // the extract scripts with the scripts that generate markup.
- Element placeholder = parent.getOwnerDocument().createElementNS(
- HTML_NS, "span");
- Nodes.setFilePositionFor(placeholder,
Nodes.getFilePositionFor(scriptTag));
- ExtractedHtmlContent.setExtractedScriptFor(
- placeholder, parsedScriptBody);
-
- // Replace the script tag with a placeholder that points to the inlined
- // script.
- if (Strings.equalsIgnoreCase(
- "defer", scriptTag.getAttributeNS(HTML_NS, "defer"))) {
- parent.removeChild(scriptTag);
- root.appendChild(placeholder);
+ if (parsedScriptBody == null || parsedScriptBody.children().isEmpty())
{
+ parent.removeChild(scriptEl);
} else {
- parent.replaceChild(placeholder, scriptTag);
+ Element placeholder = placeholderFor(scriptEl, parsedScriptBody);
+ // Replace the script tag with a placeholder that points to the
inlined
+ // script.
+ if (c.isDeferred()) {
+ parent.removeChild(scriptEl);
+ root.appendChild(placeholder);
+ } else {
+ parent.replaceChild(placeholder, scriptEl);
+ }
}
}

- private void rewriteStyleTag(Element styleTag, Jobs jobs) {
- styleTag.getParentNode().removeChild(styleTag);
-
- CharProducer cssStream = textNodesToCharProducer(
- Nodes.childrenOf(styleTag), false);
- if (cssStream != null) {
- extractStyles(styleTag, cssStream, null, jobs);
- }
+ // Build a replacement element, <span/>, and link it to the extracted
+ // javascript, so that when the DOM is rendered, we can properly
interleave
+ // the extract scripts with the scripts that generate markup.
+ private Element placeholderFor(Node n, Block parsedScriptBody) {
+ Element placeholder = n.getOwnerDocument().createElementNS(
+ Namespaces.HTML_NAMESPACE_URI, "span");
+ Nodes.setFilePositionFor(placeholder, Nodes.getFilePositionFor(n));
+ ExtractedHtmlContent.setExtractedScriptFor(placeholder,
parsedScriptBody);
+ return placeholder;
+ }
+
+ private void rewriteStyleEl(EmbeddedContent c, Jobs jobs) {
+ Element styleEl = (Element) c.getSource();
+ styleEl.getParentNode().removeChild(styleEl);
+ extractStyles(styleEl, c, null, jobs);
}

- private void rewriteLinkTag(Element styleTag, Jobs jobs) {
- PluginEnvironment env = jobs.getPluginMeta().getPluginEnvironment();
-
- styleTag.getParentNode().removeChild(styleTag);
-
- Attr rel = styleTag.getAttributeNodeNS(HTML_NS, "rel");
- if (rel == null || !Strings.equalsIgnoreCase(
- rel.getNodeValue().trim(), "stylesheet")) {
- // If it's not a stylesheet then ignore it.
- // The HtmlValidator should complain but that's not our problem.
- return;
- }
-
- Attr href = styleTag.getAttributeNodeNS(HTML_NS, "href");
- Attr media = styleTag.getAttributeNodeNS(HTML_NS, "media");
-
- if (href == null) {
- jobs.getMessageQueue().addMessage(
- PluginMessageType.MISSING_ATTRIBUTE,
- Nodes.getFilePositionFor(styleTag),
- MessagePart.Factory.valueOf("href"),
- MessagePart.Factory.valueOf("link"));
- return;
- }
-
- URI hrefUri;
- try {
- hrefUri = new URI(href.getNodeValue());
- } catch (URISyntaxException ex) {
- jobs.getMessageQueue().getMessages().add(
- new Message(PluginMessageType.MALFORMED_URL, MessageLevel.ERROR,
- Nodes.getFilePositionFor(href),
- MessagePart.Factory.valueOf(href.getNodeName())));
- return;
- }
-
- // Fetch the stylesheet source.
- URI absUri = Nodes.getFilePositionForValue(href).source().getUri()
- .resolve(hrefUri);
- jobs.getMessageContext().addInputSource(new InputSource(absUri));
- CharProducer cssStream = env.loadExternalResource(
- new ExternalReference(
- absUri, Nodes.getFilePositionForValue(href)),
- "text/css");
- if (cssStream == null) {
- jobs.getMessageQueue().addMessage(
- PluginMessageType.FAILED_TO_LOAD_EXTERNAL_URL,
- Nodes.getFilePositionForValue(href),
- MessagePart.Factory.valueOf("" + hrefUri));
- return;
- }
-
- extractStyles(styleTag, cssStream, media, jobs);
+ private void rewriteLinkEl(EmbeddedContent c, Jobs jobs) {
+ Element linkEl = (Element) c.getSource();
+ linkEl.getParentNode().removeChild(linkEl);
+ Attr media = linkEl.getAttributeNodeNS(
+ Namespaces.HTML_NAMESPACE_URI, "media");
+ extractStyles(linkEl, c, media, jobs);
}

private void extractStyles(
- Element styleTag, CharProducer cssStream, Attr media, Jobs jobs) {
- Attr type = styleTag.getAttributeNodeNS(HTML_NS, "type");
-
- if (type != null && !isCssContentType(type.getNodeValue())) {
- jobs.getMessageQueue().addMessage(
- PluginMessageType.UNRECOGNIZED_CONTENT_TYPE,
- Nodes.getFilePositionFor(type),
- MessagePart.Factory.valueOf(type.getNodeValue()),
- MessagePart.Factory.valueOf(styleTag.getTagName()));
- return;
- }
-
- CssTree.StyleSheet stylesheet;
+ Element el, EmbeddedContent c, Attr media, Jobs jobs) {
+ MessageQueue mq = jobs.getMessageQueue();
+ PluginEnvironment env = jobs.getPluginMeta().getPluginEnvironment();
+ CssTree.StyleSheet stylesheet = null;
try {
- stylesheet = parseCss(cssStream, jobs.getMessageQueue());
- if (stylesheet == null) { return; } // If all tokens ignorable.
+ stylesheet = (CssTree.StyleSheet) c.parse(env, mq);
} catch (ParseException ex) {
- ex.toMessageQueue(jobs.getMessageQueue());
- return;
- }
+ ex.toMessageQueue(mq);
+ }
+ if (stylesheet == null || stylesheet.children().isEmpty()) { return; }

Set<Name> mediaTypes = Collections.<Name>emptySet();
if (media != null) {
String[] mediaTypeArr =
media.getNodeValue().trim().split("\\s*,\\s*");
if (mediaTypeArr.length != 1 || !"".equals(mediaTypeArr[0])) {
- mediaTypes = new LinkedHashSet<Name>();
+ mediaTypes = Sets.newLinkedHashSet();
for (String mediaType : mediaTypeArr) {
if (!CssSchema.isMediaType(mediaType)) {
jobs.getMessageQueue().addMessage(
@@ -324,7 +205,7 @@
}
}
if (!(mediaTypes.isEmpty() || mediaTypes.contains(Name.css("all")))) {
- final List<CssTree.RuleSet> rules = new ArrayList<CssTree.RuleSet>();
+ final List<CssTree.RuleSet> rules = Lists.newArrayList();
stylesheet.acceptPreOrder(
new Visitor() {
public boolean visit(AncestorChain<?> ancestors) {
@@ -341,14 +222,14 @@
}
}, null);
if (!rules.isEmpty()) {
- List<CssTree> mediaChildren = new ArrayList<CssTree>();
+ List<CssTree> mediaChildren = Lists.newArrayList();
for (Name mediaType : mediaTypes) {
mediaChildren.add(
new CssTree.Medium(Nodes.getFilePositionFor(media),
mediaType));
}
mediaChildren.addAll(rules);
CssTree.Media mediaBlock = new CssTree.Media(
- Nodes.getFilePositionFor(styleTag), mediaChildren);
+ Nodes.getFilePositionFor(el), mediaChildren);
stylesheet.appendChild(mediaBlock);
}
}
@@ -367,125 +248,23 @@
* &lt;/body&gt;
* </pre>
*/
- private void moveOnLoadHandlerToEndOfBody(Element body) {
- Attr onload = body.getAttributeNodeNS(HTML_NS, "onload");
- Attr language = body.getAttributeNodeNS(HTML_NS, "language");
- if (onload == null
- // If the onload handler is vbscript, let the validator complain.
- || (language != null
&& !isJavaScriptLanguage(language.getNodeValue()))
- ) {
- return;
- }
+ private void moveOnLoadHandlerToEndOfBody(EmbeddedContent c, Jobs jobs) {
+ Attr onload = (Attr) c.getSource();
+ Element body = onload.getOwnerElement();
body.removeAttributeNode(onload);

- FilePosition pos = Nodes.getFilePositionForValue(onload);
- String source = onload.getNodeValue();
- Text sourceText = body.getOwnerDocument().createTextNode(source);
- Nodes.setFilePositionFor(sourceText, pos);
- Element scriptElement = body.getOwnerDocument().createElementNS(
- HTML_NS, "script");
- scriptElement.appendChild(sourceText);
- Nodes.setFilePositionFor(scriptElement, pos);
-
- body.appendChild(scriptElement);
- }
-
- /**
- * A CharProducer that produces characters from the concatenation of all
- * the text nodes in the given node list.
- */
- private static CharProducer textNodesToCharProducer(
- Iterable<? extends Node> nodes, boolean stripComments) {
- List<Text> textNodes = new ArrayList<Text>();
- for (Node node : nodes) {
- if (node instanceof Text) { textNodes.add((Text) node); }
- }
- if (textNodes.isEmpty()) { return null; }
- List<CharProducer> content = new ArrayList<CharProducer>();
- for (int i = 0, n = textNodes.size(); i < n; ++i) {
- Text node = textNodes.get(i);
- String text = node.getNodeValue();
- if (stripComments) {
- if (i == 0) {
- text = text.replaceFirst("^(\\s*)<!--", "$1 ");
- }
- if (i + 1 == n) {
- text = text.replaceFirst("-->(\\s*)$", " $1");
- }
- }
- content.add(CharProducer.Factory.create(
- new StringReader(text),
- FilePosition.startOf(Nodes.getFilePositionFor(node))));
- }
- if (content.size() == 1) {
- return content.get(0);
- } else {
- return CharProducer.Factory.chain(content.toArray(new
CharProducer[0]));
- }
- }
-
- /** "text/html;charset=UTF-8" -> "text/html" */
- private static String getMimeType(String contentType) {
- int typeEnd = contentType.indexOf(';');
- if (typeEnd < 0) { typeEnd = contentType.length(); }
- return Strings.toLowerCase(contentType.substring(0, typeEnd));
- }
-
- private static boolean isJavaScriptContentType(String contentType) {
- String mimeType = getMimeType(contentType);
- return ("text/javascript".equals(mimeType)
- || "application/x-javascript".equals(mimeType)
- || "type/ecmascript".equals(mimeType));
- }
-
- /**
- * Per the language attribute which identifies the programming language
for
- * event handlers.
- * <p>
- * <a href=
-
* "http://www.blooberry.com/indexdot/html/tagpages/attributes/language.htm">
- * Language</a>:
- *
- * This attribute is used to specify the current scripting
- * language in use for an element. 'JScript' and 'javascript' both
- * refer to Javascript engines. 'Vbs' and 'Vbscript' both refer to
- * Vbscript engines. 'XML' refers to an embedded XML
- * document/fragment.
- * <p>
- * Values: JScript [DEFAULT] | javascript | vbs | vbscript | XML
- */
- private static boolean isJavaScriptLanguage(String language) {
- language = Strings.toLowerCase(language);
- return language.startsWith("javascript") ||
language.startsWith("jscript");
- }
-
- private static boolean isCssContentType(String contentType) {
- return "text/css".equals(getMimeType(contentType));
- }
-
- public static Block parseJs(
- InputSource is, CharProducer cp, FilePosition scriptPos,
- MessageQueue localMessageQueue)
- throws ParseException {
- JsLexer lexer = new JsLexer(cp);
- JsTokenQueue tq = new JsTokenQueue(lexer, is);
- tq.setInputRange(scriptPos);
- if (tq.isEmpty()) { return null; }
- Parser p = new Parser(tq, localMessageQueue);
- Block body = p.parse();
- tq.expectEmpty();
- return body;
- }
-
- public static CssTree.StyleSheet parseCss(CharProducer cp, MessageQueue
mq)
- throws ParseException {
- CssTree.StyleSheet input;
- TokenQueue<CssTokenType> tq = CssParser.makeTokenQueue(cp, mq, false);
- if (tq.isEmpty()) { return null; }
-
- CssParser p = new CssParser(tq, mq, MessageLevel.WARNING);
- input = p.parseStyleSheet();
- tq.expectEmpty();
- return input;
+ PluginEnvironment env = jobs.getPluginMeta().getPluginEnvironment();
+ MessageQueue mq = jobs.getMessageQueue();
+ Block handler;
+ try {
+ handler = (Block) c.parse(env, mq);
+ } catch (ParseException ex) {
+ ex.toMessageQueue(mq);
+ return;
+ }
+ if (handler != null && !handler.children().isEmpty()) {
+ Element placeholder = placeholderFor(onload, handler);
+ body.appendChild(placeholder);
+ }
}
}
=======================================
--- /trunk/src/com/google/caja/plugin/templates/HtmlAttributeRewriter.java
Wed Dec 16 14:43:16 2009
+++ /trunk/src/com/google/caja/plugin/templates/HtmlAttributeRewriter.java
Wed Dec 23 23:20:46 2009
@@ -18,20 +18,14 @@
import com.google.caja.lang.css.CssSchema;
import com.google.caja.lang.html.HTML;
import com.google.caja.lang.html.HtmlSchema;
-import com.google.caja.lexer.CharProducer;
-import com.google.caja.lexer.CssTokenType;
import com.google.caja.lexer.ExternalReference;
import com.google.caja.lexer.FilePosition;
-import com.google.caja.lexer.JsLexer;
-import com.google.caja.lexer.JsTokenQueue;
import com.google.caja.lexer.Keyword;
import com.google.caja.lexer.ParseException;
-import com.google.caja.lexer.TokenQueue;
import com.google.caja.parser.AncestorChain;
import com.google.caja.parser.ParseTreeNode;
import com.google.caja.parser.ParseTreeNodeContainer;
import com.google.caja.parser.Visitor;
-import com.google.caja.parser.css.CssParser;
import com.google.caja.parser.css.CssTree;
import com.google.caja.parser.html.Nodes;
import com.google.caja.parser.js.AbstractExpression;
@@ -40,7 +34,7 @@
import com.google.caja.parser.js.Expression;
import com.google.caja.parser.js.FunctionConstructor;
import com.google.caja.parser.js.Identifier;
-import com.google.caja.parser.js.Parser;
+import com.google.caja.parser.js.Operation;
import com.google.caja.parser.js.Reference;
import com.google.caja.parser.js.Statement;
import com.google.caja.parser.js.StringLiteral;
@@ -50,6 +44,7 @@
import com.google.caja.plugin.CssRewriter;
import com.google.caja.plugin.CssValidator;
import com.google.caja.plugin.PluginMeta;
+import com.google.caja.plugin.stages.EmbeddedContent;
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessagePart;
import com.google.caja.reporting.MessageQueue;
@@ -77,6 +72,7 @@
private final CssSchema cssSchema;
private final HtmlSchema htmlSchema;
private final MessageQueue mq;
+ private final Map<Attr, EmbeddedContent> attributeContent;
/** Maps handler attribute source to handler names. */
private final Map<String, String> handlerCache
= new HashMap<String, String>();
@@ -85,10 +81,11 @@

public HtmlAttributeRewriter(
PluginMeta meta, CssSchema cssSchema, HtmlSchema htmlSchema,
- MessageQueue mq) {
+ Map<Attr, EmbeddedContent> attributeContent, MessageQueue mq) {
this.meta = meta;
this.cssSchema = cssSchema;
this.htmlSchema = htmlSchema;
+ this.attributeContent = attributeContent;
this.mq = mq;
}

@@ -100,20 +97,22 @@
}

public static abstract class AttrValue {
+ final Attr src;
final FilePosition valuePos;
final HTML.Attribute attrInfo;
abstract Expression getValueExpr();
abstract String getPlainValue();
abstract String getRawValue();

- AttrValue(FilePosition valuePos, HTML.Attribute attr) {
+ AttrValue(Attr src, FilePosition valuePos, HTML.Attribute attr) {
+ this.src = src;
this.valuePos = valuePos;
this.attrInfo = attr;
}
}

public static AttrValue fromAttr(final Attr a, HTML.Attribute attr) {
- return new AttrValue(Nodes.getFilePositionForValue(a), attr) {
+ return new AttrValue(a, Nodes.getFilePositionForValue(a), attr) {
@Override
Expression getValueExpr() {
return StringLiteral.valueOf(valuePos, getPlainValue());
@@ -183,14 +182,8 @@
case SCRIPT:
String handlerFnName = handlerCache.get(value);
if (handlerFnName == null) {
- Block b;
- try {
- b = parseJsFromAttrValue(attr);
- } catch (ParseException ex) {
- ex.toMessageQueue(mq);
- return noResult(attr);
- }
- if (b.children().isEmpty()) { return noResult(attr); }
+ Block b = jsFromAttrib(attr);
+ if (b == null || b.children().isEmpty()) { return
noResult(attr); }
rewriteEventHandlerReferences(b);

handlerFnName = meta.generateUniqueName("c");
@@ -220,12 +213,8 @@
dynamicValue = eventAdapter;
break;
case STYLE:
- CssTree.DeclarationGroup decls;
- try {
- decls = parseStyleAttrib(attr);
- if (decls == null) { return noResult(attr); }
- } catch (ParseException ex) {
- ex.toMessageQueue(mq);
+ CssTree.DeclarationGroup decls = styleFromAttrib(attr);
+ if (decls == null || decls.children().isEmpty()) {
return noResult(attr);
}

@@ -248,24 +237,52 @@
dynamicValue = StringLiteral.valueOf(pos, css);
break;
case URI:
- try {
- URI uri = new URI(value);
- ExternalReference ref = new ExternalReference(uri, pos);
- String rewrittenUri = meta.getPluginEnvironment()
- .rewriteUri(ref, attr.attrInfo.getMimeTypes());
- if (rewrittenUri == null) {
+ if (attributeContent.containsKey(attr.src)) { // A javascript: URI
+ Block b = this.jsFromAttrib(attr);
+ if (b == null || b.children().isEmpty()) { return
noResult(attr); }
+ rewriteEventHandlerReferences(b);
+
+ handlerFnName = meta.generateUniqueName("c");
+ Declaration handler = (Declaration) QuasiBuilder.substV(
+ ""
+ + "var @handlerName = ___./*@synthetic*/markFuncFreeze("
+ + " /*@synthetic*/function ("
+ + ReservedNames.THIS_NODE + ") { @body*; });",
+ "handlerName", SyntheticNodes.s(
+ new Identifier(FilePosition.UNKNOWN, handlerFnName)),
+ "body", new ParseTreeNodeContainer(b.children()));
+ handlers.add(handler);
+ handlerCache.put(value, handlerFnName);
+
+ Operation urlAdapter = (Operation) QuasiBuilder.substV(
+ ""
+ + "'javascript:' + /*@synthetic*/encodeURIComponent("
+ + " 'plugin_dispatchEvent___(this, null, '"
+ + " + ___./*@synthetic*/getId(IMPORTS___)"
+ + " + ', ' + '@handlerName' + '), void 0')",
+ "handlerName", new Identifier(pos, handlerFnName));
+ urlAdapter.setFilePosition(pos);
+ dynamicValue = urlAdapter;
+ } else {
+ try {
+ URI uri = new URI(value);
+ ExternalReference ref = new ExternalReference(uri, pos);
+ String rewrittenUri = meta.getPluginEnvironment()
+ .rewriteUri(ref, attr.attrInfo.getMimeTypes());
+ if (rewrittenUri == null) {
+ mq.addMessage(
+ IhtmlMessageType.MALFORMED_URI, pos,
+ MessagePart.Factory.valueOf(uri.toString()));
+ return noResult(attr);
+ }
+ dynamicValue = StringLiteral.valueOf(
+ ref.getReferencePosition(), rewrittenUri);
+ } catch (URISyntaxException ex) {
mq.addMessage(
IhtmlMessageType.MALFORMED_URI, pos,
- MessagePart.Factory.valueOf(uri.toString()));
+ MessagePart.Factory.valueOf(value));
return noResult(attr);
}
- dynamicValue = StringLiteral.valueOf(
- ref.getReferencePosition(), rewrittenUri);
- } catch (URISyntaxException ex) {
- mq.addMessage(
- IhtmlMessageType.MALFORMED_URI, pos,
- MessagePart.Factory.valueOf(value));
- return noResult(attr);
}
break;
case URI_FRAGMENT:
@@ -400,76 +417,6 @@
}
}, null);
}
-
- /**
- * Parses an {@code onclick} handler's or other handler's attribute value
- * as a javascript statement.
- */
- private Block parseJsFromAttrValue(AttrValue attr) throws ParseException
{
- FilePosition pos = attr.valuePos;
- CharProducer cp = fromAttrValue(attr);
- JsTokenQueue tq = new JsTokenQueue(new JsLexer(cp, false),
pos.source());
- tq.setInputRange(pos);
- if (tq.isEmpty()) {
- return new Block(pos, Collections.<Statement>emptyList());
- }
- // Parse as a javascript block.
- Block b = new Parser(tq, mq).parse();
- // Block will be sanitized in a later pass.
- b.setFilePosition(pos);
- return b;
- }
-
- /**
- * Parses a style attribute's value as a CSS declaration group.
- */
- private CssTree.DeclarationGroup parseStyleAttrib(AttrValue attr)
- throws ParseException {
- return parseCssDeclarationGroup(fromAttrValue(attr), attr.valuePos);
- }
-
- CssTree.DeclarationGroup parseCssDeclarationGroup(
- CharProducer cp, FilePosition inputRange)
- throws ParseException {
- // Parse the CSS as a set of declarations separated by semicolons.
- TokenQueue<CssTokenType> tq = CssParser.makeTokenQueue(cp, mq, false);
- if (tq.isEmpty()) { return null; }
- if (inputRange != null) { tq.setInputRange(inputRange); }
- CssParser p = new CssParser(tq, mq, MessageLevel.WARNING);
- CssTree.DeclarationGroup decls = p.parseDeclarationGroup();
- tq.expectEmpty();
- return decls;
- }
-
- private static CharProducer fromAttrValue(AttrValue a) {
- String value = a.getPlainValue();
- FilePosition pos = a.valuePos;
- String rawValue = a.getRawValue();
- // Use the raw value so that the file positions come out right in
- // error messages.
- if (rawValue != null) {
- // The raw value is HTML so we wrap it in an HTML decoder.
- CharProducer cp = CharProducer.Factory.fromHtmlAttribute(
- CharProducer.Factory.fromString(deQuote(rawValue), pos));
- // Check if the attribute value has been set since parsing.
- if (String.valueOf(cp.getBuffer(), cp.getOffset(), cp.getLength())
- .equals(value)) {
- return cp;
- }
- }
- // Reached if no raw value stored or if the raw value is out of sync.
- return CharProducer.Factory.fromString(value, pos);
- }
-
- /** Strip quotes from an attribute value if there are any. */
- private static String deQuote(String s) {
- int len = s.length();
- if (len < 2) { return s; }
- char ch0 = s.charAt(0);
- return (('"' == ch0 || '\'' == ch0) && ch0 == s.charAt(len - 1))
- ? " " + s.substring(1, len - 1) + " "
- : s;
- }

static SanitizedAttr noResult(AttrValue a) {
String safeValue = a.attrInfo.getSafeValue();
@@ -492,4 +439,30 @@
? Collections.<String>emptyList()
: Arrays.asList(idents.trim().split("\\s+"));
}
-}
+
+ private Block jsFromAttrib(AttrValue v) {
+ EmbeddedContent c = attributeContent.get(v.src);
+ if (c == null) { return null; }
+ try {
+ ParseTreeNode n = c.parse(meta.getPluginEnvironment(), mq);
+ if (n instanceof Block) { return (Block) n; }
+ } catch (ParseException ex) {
+ ex.toMessageQueue(mq);
+ }
+ return null;
+ }
+
+ private CssTree.DeclarationGroup styleFromAttrib(AttrValue v) {
+ EmbeddedContent c = attributeContent.get(v.src);
+ if (c == null) { return null; }
+ try {
+ ParseTreeNode n = c.parse(meta.getPluginEnvironment(), mq);
+ if (n instanceof CssTree.DeclarationGroup) {
+ return (CssTree.DeclarationGroup) n;
+ }
+ } catch (ParseException ex) {
+ ex.toMessageQueue(mq);
+ }
+ return null;
+ }
+}
=======================================
--- /trunk/src/com/google/caja/plugin/templates/SafeHtmlMaker.java Fri Nov
13 11:43:08 2009
+++ /trunk/src/com/google/caja/plugin/templates/SafeHtmlMaker.java Wed Dec
23 23:20:46 2009
@@ -33,9 +33,9 @@
import com.google.caja.plugin.ExtractedHtmlContent;
import com.google.caja.plugin.PluginMeta;
import com.google.caja.reporting.MessageContext;
+import com.google.caja.util.Lists;
import com.google.caja.util.Pair;

-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@@ -99,7 +99,7 @@
private final PluginMeta meta;
private final MessageContext mc;
private final Document doc;
- private final List<Block> js = new ArrayList<Block>();
+ private final List<Block> js = Lists.newArrayList();
private final Map<Node, ParseTreeNode> scriptsPerNode;
private final List<Node> roots;
private final List<Statement> handlers;
@@ -141,8 +141,8 @@
// First we build a skeleton which maps a safe DOM to a list of "bones"
// which include element start tags, text nodes, and embedded scripts
in
// depth-first order.
- List<DomBone> domSkeleton = new ArrayList<DomBone>();
- List<Node> safe = new ArrayList<Node>(roots.size());
+ List<DomBone> domSkeleton = Lists.newArrayList();
+ List<Node> safe = Lists.newArrayList(roots.size());
for (Node root : roots) {
Node one = makeSkeleton(root, domSkeleton);
if (one != null) { safe.add(one); }
@@ -151,7 +151,7 @@
fleshOutSkeleton(domSkeleton);

Node safeHtml = consolidateHtml(safe);
- return Pair.pair(safeHtml, (List<Block>) new ArrayList<Block>(js));
+ return Pair.pair(safeHtml, Lists.newArrayList(js));
}

/** Part of a DOM skeleton. */
=======================================
--- /trunk/src/com/google/caja/plugin/templates/TemplateCompiler.java Fri
Nov 13 11:43:08 2009
+++ /trunk/src/com/google/caja/plugin/templates/TemplateCompiler.java Wed
Dec 23 23:20:46 2009
@@ -26,13 +26,15 @@
import com.google.caja.parser.js.UncajoledModule;
import com.google.caja.plugin.ExtractedHtmlContent;
import com.google.caja.plugin.PluginMeta;
+import com.google.caja.plugin.stages.EmbeddedContent;
+import com.google.caja.plugin.stages.HtmlEmbeddedContentFinder;
import com.google.caja.reporting.MessageContext;
import com.google.caja.reporting.MessageLevel;
import com.google.caja.reporting.MessageQueue;
+import com.google.caja.util.Lists;
+import com.google.caja.util.Maps;
import com.google.caja.util.Pair;

-import java.util.ArrayList;
-import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;

@@ -84,7 +86,10 @@
* </ul>
*/
private final Map<Node, ParseTreeNode> scriptsPerNode
- = new IdentityHashMap<Node, ParseTreeNode>();
+ = Maps.newIdentityHashMap();
+
+ private final Map<Attr, EmbeddedContent> embeddedContent
+ = Maps.newIdentityHashMap();

/**
* @param ihtmlRoots roots of trees to process.
@@ -100,13 +105,14 @@
List<? extends CssTree.StyleSheet> safeStylesheets,
CssSchema cssSchema, HtmlSchema htmlSchema,
PluginMeta meta, MessageContext mc, MessageQueue mq) {
- this.ihtmlRoots = new ArrayList<Node>(ihtmlRoots);
- this.safeStylesheets = new
ArrayList<CssTree.StyleSheet>(safeStylesheets);
+ this.ihtmlRoots = Lists.newArrayList(ihtmlRoots);
+ this.safeStylesheets = Lists.newArrayList(safeStylesheets);
this.htmlSchema = htmlSchema;
this.meta = meta;
this.mc = mc;
this.mq = mq;
- this.aRewriter = new HtmlAttributeRewriter(meta, cssSchema,
htmlSchema, mq);
+ this.aRewriter = new HtmlAttributeRewriter(
+ meta, cssSchema, htmlSchema, embeddedContent, mq);
}

/**
@@ -116,6 +122,12 @@
private void inspect() {
if (!mq.hasMessageAtLevel(MessageLevel.FATAL_ERROR)) {
for (Node ihtmlRoot : ihtmlRoots) {
+ HtmlEmbeddedContentFinder finder = new HtmlEmbeddedContentFinder(
+ htmlSchema, null, mq, mc);
+ for (EmbeddedContent c : finder.findEmbeddedContent(ihtmlRoot)) {
+ Node src = c.getSource();
+ if (src instanceof Attr) { embeddedContent.put((Attr) src, c); }
+ }
inspect(ihtmlRoot, ElKey.forHtmlElement("div"));
}
}
=======================================
--- /trunk/tests/com/google/caja/ancillary/servlet/ZipFileSystemTest.java
Thu Dec 3 17:21:42 2009
+++ /trunk/tests/com/google/caja/ancillary/servlet/ZipFileSystemTest.java
Wed Dec 23 23:20:46 2009
@@ -14,6 +14,8 @@

package com.google.caja.ancillary.servlet;

+import com.google.caja.util.ContentType;
+
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStream;
=======================================
--- /trunk/tests/com/google/caja/lexer/CharProducerTest.java Thu Jul 23
09:16:30 2009
+++ /trunk/tests/com/google/caja/lexer/CharProducerTest.java Wed Dec 23
23:20:46 2009
@@ -508,6 +508,42 @@
}
assertEquals(positions.length, actualPositions.size());
}
+
+ private static final String decodeUri(String uriPart) {
+ return CharProducer.Factory.fromUri(
+ CharProducer.Factory.fromString(uriPart, InputSource.UNKNOWN))
+ .toString();
+ }
+
+ public final void testFromUri() {
+ assertEquals("", decodeUri(""));
+ assertEquals("foo", decodeUri("foo"));
+ // Plus (+) character not decoded to space.
+ // javascript:alert('foo+bar') issues an alert containing a plus
character.
+ assertEquals("foo+bar", decodeUri("foo+bar"));
+ assertEquals("foo@bar", decodeUri("foo%40bar"));
+ assertEquals("\u00A0", decodeUri("%A0")); // A single ASCII char
+ // Test some well-formed UTF-8 sequences.
+ assertEquals("foo\u0123bar", decodeUri("foo%C4%a3bar"));
+ assertEquals("foo\u20ACbar", decodeUri("foo%e2%82%Acbar"));
+ // There are multiple ways to encode supplementary characters
+ assertEquals(
+ String.valueOf(Character.toChars(0x1d11e)),
+ decodeUri("%ed%a0%B4%eD%b4%9E")); // as a surrogate pair
+ assertEquals(
+ String.valueOf(Character.toChars(0x1d11e)),
+ decodeUri("%F0%9d%84%9E")); // as a 4 byte sequence
+ assertEquals(
+ String.valueOf(Character.toChars(0x1d11e)),
+ decodeUri("%f0%9D%84%9e")); // as a 4 byte sequence with
different case
+ // Test boundary conditions.
+ assertEquals("%", decodeUri("%"));
+ assertEquals("%2", decodeUri("%2")); // An incomplete sequence
+ assertEquals("%z", decodeUri("%z")); // A non-hex follower.
+ assertEquals("%", decodeUri("%25"));
+ assertEquals("%2", decodeUri("%252"));
+ assertEquals("%25", decodeUri("%2525")); // Don't over decode.
+ }

private static final Pattern ESCAPED =
Pattern.compile("[^\\p{javaLetterOrDigit}
\\.\\-\\:\\;\\'\\\",/\\?&\\#]");
=======================================
--- /trunk/tests/com/google/caja/opensocial/example-rewritten.xml Fri Nov
13 13:37:15 2009
+++ /trunk/tests/com/google/caja/opensocial/example-rewritten.xml Wed Dec
23 23:20:46 2009
@@ -72,7 +72,7 @@
try {
{
throw $v.ts($v.construct($v.ro('Error'), [
- 'Failed to load example-gadget-files/no-such-file.js'
]));
+ 'Failed to load no-such-file.js' ]));
}
} catch (ex___) {
___.getNewModuleHandler().handleUncaughtException(ex___,
=======================================
--- /trunk/tests/com/google/caja/plugin/domita_test_untrusted.html Tue Dec
8 16:50:45 2009
+++ /trunk/tests/com/google/caja/plugin/domita_test_untrusted.html Wed Dec
23 23:20:46 2009
@@ -3201,14 +3201,17 @@
(''
+ '<map name="foo-xyz___">'
+ '<area href="http://example.com/'
- + '?mime-type=*%2F*&amp;uri=areatarget.html"'
+ + '?mime-type=*%2F*&amp;uri=(base)/areatarget.html"'
+ ' target="_blank">'
+ '</map>'
+ '<img'
- + '
src="http://example.com/?mime-type=image%2F*&amp;uri=mappic.gif"'
- + ' usemap="#foo-xyz___"'
+ + ' src="http://example.com/?mime-type=image%2F*'
+ + '&amp;uri=(base)/mappic.gif" usemap="#foo-xyz___"'
+ '>'),
- canonInnerHtml(directAccess.getInnerHTML(el)));
+ canonInnerHtml(directAccess.getInnerHTML(el)).replace(
+ // Normalize the base URI
+ new RegExp('\\buri=[^\"<>]*?(%2[Ff]|/)plugin(%2[Ff]|/)', 'g'),
+ 'uri=(base)/'));

pass('test-usemap');
});
=======================================
--- /trunk/tests/com/google/caja/plugin/stages/RewriteHtmlStageTest.java
Fri Nov 13 11:43:08 2009
+++ /trunk/tests/com/google/caja/plugin/stages/RewriteHtmlStageTest.java
Wed Dec 23 23:20:46 2009
@@ -14,6 +14,7 @@

package com.google.caja.plugin.stages;

+import com.google.caja.lang.html.HtmlSchema;
import com.google.caja.lexer.FilePosition;
import com.google.caja.parser.AncestorChain;
import com.google.caja.parser.html.Namespaces;
@@ -187,8 +188,7 @@
+ "<span jobnum=\"3\"></span>",
Job.JobType.HTML),
job("{\n onerror = panic;\n}", Job.JobType.JAVASCRIPT),
- job("{\n throw new Error("
- + "'Failed to load
http://bogus.com/bogus.js#\\'!');\n}",
+ job("{\n throw new Error('Failed to load bogus.js#%27%21');\n}",
Job.JobType.JAVASCRIPT),
job("{ foo(); }", Job.JobType.JAVASCRIPT));
assertNoErrors();
@@ -197,7 +197,9 @@
@Override
protected boolean runPipeline(Jobs jobs) throws Exception {
mq.getMessages().clear();
- boolean result = new RewriteHtmlStage().apply(jobs);
+ HtmlSchema schema = HtmlSchema.getDefault(mq);
+ boolean result = new ResolveUriStage(schema).apply(jobs)
+ && new RewriteHtmlStage(schema).apply(jobs);
// Dump the extracted script bits on the queue.
for (Job job : new ArrayList<Job>(jobs.getJobsByType(JobType.HTML))) {
Dom dom = job.getRoot().cast(Dom.class).node;
=======================================
--- /trunk/tests/com/google/caja/plugin/templates/TemplateCompilerTest.java
Thu Dec 10 17:39:38 2009
+++ /trunk/tests/com/google/caja/plugin/templates/TemplateCompilerTest.java
Wed Dec 23 23:20:46 2009
@@ -223,6 +223,30 @@
+ " emitter___.signalLoaded();"
+ "}")));
}
+
+ public final void testJavascriptUrl() throws Exception {
+ assertSafeHtml(
+ htmlFragment(fromString(
+ "<a href='javascript:alert(1+1)'>Two!!</a>")),
+ htmlFragment(fromString(
+ "<a id=\"id_2___\" target=\"_blank\">Two!!</a>")),
+ js(fromString(
+ ""
+ + "{"
+ // The extracted handler.
+ + " var c_1___ = ___.markFuncFreeze(function(thisNode___) {"
+ + " alert(1 + 1);" // Cajoled later
+ + " });"
+ + " var el___; var emitter___ = IMPORTS___.htmlEmitter___;"
+ + " el___ = emitter___.byId('id_2___');"
+ + " emitter___.setAttr(el___, 'href', 'javascript:' +"
+ + " encodeURIComponent('plugin_dispatchEvent___(this,
null, ' +"
+ + " ___.getId(IMPORTS___) + ', ' + 'c_1___' + '), void
0'));"
+ + " el___.removeAttribute('id');"
+ + " el___ = emitter___.finish();"
+ + " emitter___.signalLoaded();"
+ + "}")));
+ }

// See bug 722
public final void testFormOnSubmitEmpty() throws Exception {

Reply all
Reply to author
Forward
0 new messages