r46730 - speeltuin/andre/mmget/src/main/java/org/mmbase/mmget

0 views
Skip to first unread message

andre

unread,
Oct 8, 2015, 2:29:17 PM10/8/15
to mmbase...@googlegroups.com
Author: andre
Date: 2015-10-08 20:29:16 +0200 (Thu, 08 Oct 2015)
New Revision: 46730

Modified:
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java
Log:
fixed a bug with rewriting links with images and sessionids

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java 2015-08-06 22:17:00 UTC (rev 46729)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java 2015-10-08 18:29:16 UTC (rev 46730)
@@ -107,7 +107,7 @@
if (lcTag.startsWith("<a ") || lcTag.startsWith("<link ") || lcTag.startsWith("<area ")) {
attr = "href=";
} else {
- attr = "src="; // TODO: src's of css in html?
+ attr = "src="; // @TODO src's of css in html?
}

p1 = lcTag.indexOf(attr);

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java 2015-08-06 22:17:00 UTC (rev 46729)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java 2015-10-08 18:29:16 UTC (rev 46730)
@@ -19,7 +19,7 @@

/**
* An MMBase application that excepts and url to export all files 'below' that url.
- * TODO: init rootURL early on, and check all urls against it (so we don't travel up the rootURL)
+ * @TODO init rootURL early on, and check all urls against it (so we don't travel up the rootURL)
*
* @author Andr&eacute; van Toly
* @version $Id$
@@ -225,6 +225,10 @@
startdirURL = null;

readUrl(startURL);
+
+ if (log.isDebugEnabled()) {
+ log.debug(savedURLs);
+ }
return "Finished! Saved " + savedURLs.size() + " links to files.";
}

@@ -306,6 +310,9 @@
ResourceWriter rw = null;
try {
rw = new ResourceWriter(linkURL);
+ if (rw == null) {
+ return;
+ }
filename = rw.getFilename();

if (rw.getContentType() < 1) {
@@ -313,8 +320,10 @@
rw.disconnect();

} else {
+
if (rw.getContentType() == CONTENTTYPE_HTML
&& !linkURL.toString().startsWith(startdirURL.toString())) {
+
log.info(linkURL.toString() + " -- UP TREE, not following");

if (!link.equals(linkURL.toString()) && !links2files.containsKey(link)) {
@@ -344,7 +353,7 @@

String relative = UriParser.makeRelative(calcdir, calclink.toString());
if (!"".equals(link) && !links2files.containsKey(link) && !link.equals(relative)) { // only when different
- //log.debug("link2files: " + link + " -> " + relative);
+ log.debug("link2files: " + link + " -> " + relative);
links2files.put(link, relative); /* /dir/css/bla.css + ../css/bla.css */
}


Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java 2015-08-06 22:17:00 UTC (rev 46729)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java 2015-10-08 18:29:16 UTC (rev 46730)
@@ -53,9 +53,10 @@
private void rewrite() throws IOException {
if (log.isDebugEnabled()) log.debug("REwriting: " + url + " -> file: " + filename);
File f = getFile(filename);
- if (f.exists()) {
- //log.warn("File '" + f.toString() + "' already exists, deleting it and saving again.");
- f.delete();
+ if (f.exists() && f.delete()) {
+ if (log.isDebugEnabled()) {
+ log.debug("File '" + f.toString() + "' already existed, deleted it and saving again.");
+ }
}

Map<String,String> links2files = new HashMap<String,String>();
@@ -78,15 +79,16 @@
int pos1 = line.indexOf(sblink.toString());
if (pos1 > -1) {
int pos2 = line.indexOf("\"", pos1 + 1);
-
- String hitlink = line.substring(pos1 + 1, pos2);
- String testlink = hitlink;
- if (hitlink.contains(";")) testlink = MMGet.removeSessionid(hitlink);
- //log.debug("hitlink: '" + hitlink + "', testlink: '" + testlink + "'" + "', link: '" + link + "'");
+ String hit = line.substring(pos1 + 1, pos2); // between the two "
+
+ // check if this hit is the exact same as link
+ String testlink = MMGet.removeSessionid(hit);
if (!testlink.equals(link)) continue;
-
- line = line.replace(hitlink, file);
- //if (log.isDebugEnabled()) log.debug("replaced '" + link + "' with '" + file + "' in: " + filename);
+
+ line = line.replace(hit, file);
+ if (log.isDebugEnabled()) {
+ log.debug("replaced '" + link + "' with '" + file + "' in: " + filename + "' (hitlink: '" + hit + "')");
+ }
}
}
}
@@ -95,8 +97,10 @@
out.flush();
in.close();
out.close();
-
- if (log.isDebugEnabled()) log.debug("Saved: " + url + " -> file: " + f.toString() );
+
+ if (log.isDebugEnabled()) {
+ log.debug("Saved: " + url + " -> file: " + f.toString() );
+ }

}


Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java 2015-08-06 22:17:00 UTC (rev 46729)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java 2015-10-08 18:29:16 UTC (rev 46730)
@@ -72,7 +72,9 @@
f.delete();

} else {
- log.info("Not modified: " + f.toString() + ", f:" + f.lastModified() + " huc:" + huc.getLastModified());
+ if (log.isDebugEnabled()) {
+ log.debug("Not modified: " + f.toString() + ", f:" + f.lastModified() + " huc:" + huc.getLastModified());
+ }
// MMGet.savedURLs.put(url, filename);
MMGet.addSavedURL(this.url, filename);


Reply all
Reply to author
Forward
0 new messages