r46729 - speeltuin/andre/mmget/src/main/java/org/mmbase/mmget

0 views
Skip to first unread message

andre

unread,
Aug 6, 2015, 6:17:01 PM8/6/15
to mmbase...@googlegroups.com
Author: andre
Date: 2015-08-07 00:17:00 +0200 (Fri, 07 Aug 2015)
New Revision: 46729

Modified:
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/CSSReader.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java
speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/UrlReaders.java
Log:
some more refactoring

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/CSSReader.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/CSSReader.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/CSSReader.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -19,7 +19,7 @@
* @author André van Toly
* @version $Id$
*/
-public final class CSSReader extends UrlReader {
+final class CSSReader extends UrlReader {
//private static final Logger log = Logging.getLoggerInstance(CSSReader.class);

private HttpURLConnection huc = null;
@@ -30,9 +30,9 @@
@import "form.css";
@import url("mystyle.css");
*/
- public static final String URL_PATTERN = "[\\w\\s?]url\\((.*)\\)[\\s;]";
- private static final Pattern urlPattern = Pattern.compile(URL_PATTERN);;
- public static final String IMPORT_PATTERN = "@import\\s+[\"\'](.*)[\"\']";
+ private static final String URL_PATTERN = "[\\w\\s?]url\\((.*)\\)[\\s;]";
+ private static final Pattern urlPattern = Pattern.compile(URL_PATTERN);
+ private static final String IMPORT_PATTERN = "@import\\s+[\"\'](.*)[\"\']";
private static final Pattern importPattern = Pattern.compile(IMPORT_PATTERN);

public CSSReader(HttpURLConnection huc) throws IOException {

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/HTMLReader.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -16,7 +16,7 @@
* @author André van Toly
* @version $Id$
*/
-public final class HTMLReader extends UrlReader {
+final class HTMLReader extends UrlReader {
private static final Logger log = Logging.getLoggerInstance(HTMLReader.class);

private HttpURLConnection huc = null;
@@ -25,14 +25,14 @@
/**
* Tags with src or href attributes to be looking for.
*/
- public final static String[] wantTags = {
+ private final static String[] WANT_TAGS = {
"<a ", "<A ",
"<applet", "<APPLET",
"<area", "<AREA",
"<audio", "<AUDIO",
"<embed", "<EMBED",
"<frame", "<FRAME",
- //"<input", "<INPUT", // TODO: <input type="image" src=".." />
+ "<input", "<INPUT", // only for <input type="image" src=... />
"<iframe", "<IFRAME",
"<img", "<IMG",
"<link", "<LINK",
@@ -50,14 +50,17 @@

/**
* Gets all links that look they can contain to resources
- * @return list contain links
+ * @return list with links
*/
public ArrayList<String> getLinks() throws IOException {
ArrayList<String> al = new ArrayList<String>();
String tag;
while ((tag = nextTag()) != null) {
- for (String wantTag : wantTags) {
+ for (String wantTag : WANT_TAGS) {
if (tag.startsWith(wantTag)) {
+ if (tag.startsWith("<input") && !tag.contains("image")) {
+ break;
+ }
String link = extractHREF(tag);
if (link != null) {
al.add(link);
@@ -81,7 +84,7 @@
* Reads a tags and its contents.
* @return the tag
*/
- protected String readTag() throws IOException {
+ String readTag() throws IOException {
StringBuilder theTag = new StringBuilder("<");
int i = '<';
while (i != '>' && (i = inrdr.read()) != -1) {
@@ -96,7 +99,7 @@
* @param tag the first parameter
* @return a link to a resource hopefully
*/
- public static String extractHREF(String tag) {
+ private static String extractHREF(String tag) {
String lcTag = tag.toLowerCase();
String attr;
int p1, p2, p3, p4;
@@ -104,7 +107,7 @@
if (lcTag.startsWith("<a ") || lcTag.startsWith("<link ") || lcTag.startsWith("<area ")) {
attr = "href=";
} else {
- attr = "src="; // TODO: src's of css in html
+ attr = "src="; // TODO: src's of css in html?
}

p1 = lcTag.indexOf(attr);
@@ -122,7 +125,8 @@

String href = tag.substring(p3 + 1, p4);
if (href.startsWith("mailto") || href.startsWith("#") || href.startsWith("javascript")) {
- //log.info(href + " -- NOT FOLLOWING (yet)"); // Can't be used (for now), TODO: todo's here?
+ //log.info(href + " -- NOT FOLLOWING (yet)"); // Can't be used (for now),
+ // TODO: todo's here?
return null;
}

@@ -132,8 +136,8 @@
/**
* Read the next tag
* @return a complete tag, like &lt;img scr="foo.gif" /&gt;
- */
- public String nextTag() throws IOException {
+ */
+ String nextTag() throws IOException {
int i;
while ((i = inrdr.read()) != -1) {
char c = (char)i;

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/MMGet.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -28,66 +28,63 @@

private static final Logger log = Logging.getLoggerInstance(MMGet.class);

- public static final String CONFIG_FILE = "mmget.xml";
+ private static final String CONFIG_FILE = "mmget.xml";
private static final UtilReader utilreader = new UtilReader(CONFIG_FILE, new Runnable() {
public void run() {
configure(utilreader.getProperties());
}
});
/* link to start exporting from and directory of the start url */
- public static String url;
+ private static String url;
public static String serverpart;
- protected static URL startURL;
- protected static URL startdirURL;
+ private static URL startURL;
+ private static URL startdirURL;

/**
* Location the files should be saved to, directory to save files should be in the webroot (for now)
*/
- public static String directory;
+ private static String directory;
/**
* Directory to save in
*/
- protected static File savedir;
+ static File savedir;

- public static Future<String> future = null;
- protected boolean done = false;
+ private static Future<String> future = null;

/* not wanted: offsite, already tried but 404 etc. */
- protected static Set<URL> ignoredURLs = new HashSet<URL>();
+ static final Set<URL> ignoredURLs = new HashSet<URL>();
/* urls to read (html, css) */
- protected static List<URL> readURLs = Collections.synchronizedList(new ArrayList<URL>());
+ private static final List<URL> readURLs = Collections.synchronizedList(new ArrayList<URL>());
/* saved: url -> filename */
- protected static Map<URL,String> savedURLs = Collections.synchronizedMap(new HashMap<URL,String>());
+ private static final Map<URL,String> savedURLs = Collections.synchronizedMap(new HashMap<URL,String>());
/**
* rewrite these: url -> link in page / new link in rewritten page
*/
- protected static Map<URL,Map<String,String>> url2links;
+ static final Map<URL,Map<String,String>> url2links;
static {
url2links = Collections.synchronizedMap(new HashMap<URL, Map<String, String>>());
}

- /* homepage to use when saving a file with no extension (thus presuming directory) */
- protected static String homepage = "index.html";
- protected static List<String> contentheadersHTML = Arrays.asList(
+ private static List<String> contentheadersHTML = Arrays.asList(
"text/html",
"application/xhtml+xml",
"application/xml",
"text/xml"
);
- protected static List<String> contentheadersCSS = Arrays.asList(
+ private static List<String> contentheadersCSS = Arrays.asList(
"text/css"
);

/* content-types */
protected static final int CONTENTTYPE_OTHER = 0;
- protected static final int CONTENTTYPE_HTML = 1;
- protected static final int CONTENTTYPE_CSS = 2;
+ static final int CONTENTTYPE_HTML = 1;
+ static final int CONTENTTYPE_CSS = 2;

/**
* Checks and sets links and export directory.
* Checks if the export directory exists, if not will try to create one in the MMBase data directory.
*/
- public static void init() throws IOException, URISyntaxException, MalformedURLException {
+ private static void init() throws IOException, URISyntaxException, MalformedURLException {
configure(utilreader.getProperties());
File datadir = MMBase.getMMBase().getDataDir();
ResourceLoader webroot = ResourceLoader.getWebRoot();
@@ -122,7 +119,7 @@
* Reads configuration
* @param config configuration properties
*/
- synchronized static void configure(Map<String, String> config) {
+ private synchronized static void configure(Map<String, String> config) {
//if (log.isDebugEnabled()) log.debug("Reading configuration..");
String tmp = config.get("directory");
if (tmp != null && !tmp.equals("") && directory != null && directory.equals("")) {
@@ -131,7 +128,7 @@
}
tmp = config.get("homepage");
if (tmp != null && !tmp.equals("")) {
- homepage = tmp;
+ String homepage = tmp;
log.info("Default homepage: " + homepage);
}
tmp = config.get("htmlheaders");
@@ -194,7 +191,7 @@
return start();
}
});
- ThreadPools.identify(future, "MMGet download of url '" + startURL.toString() + "'<br />in directory '" + savedir.toString() + "'");
+ ThreadPools.identify(future, "MMGet download of url '" + startURL.toString() + "'\nin directory '" + savedir.toString() + "'");
String fname = ThreadPools.getString(future);
log.debug("fname: " + fname);
int timeout = 10;
@@ -232,7 +229,7 @@
}

public void cancel() {
- done = true;
+ boolean done = true;
}

/**
@@ -376,7 +373,7 @@

}

- protected static int contentType(URLConnection uc) {
+ static int contentType(URLConnection uc) {
String contentheader = uc.getHeaderField("content-type");
//log.debug("header: " + contentheader);
int pk = contentheader.indexOf(";");
@@ -451,7 +448,9 @@
f = new File(savedir, dir);
if (!f.exists()) {
if (f.mkdirs()) {
- //log.debug("Directory created: " + savedir);
+ if (log.isDebugEnabled()) {
+ log.debug("Directory created: " + savedir);
+ }
} else {
log.warn("Directory '" + f + "' could not be created");
}
@@ -484,13 +483,13 @@
return pathList;
}
*/
- protected String getSavedFilename(URL url) {
+String getSavedFilename(URL url) {
synchronized(savedURLs) {
return savedURLs.get(url);
}
}

- protected static void addSavedURL(URL url, String filename) {
+ static void addSavedURL(URL url, String filename) {
synchronized(savedURLs) {
if (!savedURLs.containsKey(url)) savedURLs.put(url, filename);
}

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceReWriter.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -16,11 +16,10 @@
* @author Andr&eacute; van Toly
* @version $Id$
*/
-public final class ResourceReWriter extends ResourceWriter {
+final class ResourceReWriter extends ResourceWriter {
private static final Logger log = Logging.getLoggerInstance(ResourceReWriter.class);

private URL url;
- private HttpURLConnection huc = null;
private static String filename;
//private static int contenttype;

@@ -31,7 +30,7 @@
public ResourceReWriter(URL url) throws IOException {
super(url);
//log.debug("Trying to download... " + url.toString() + " to " + filename);
- this.huc = super.huc;
+ HttpURLConnection huc = super.huc;

this.url = getUrl();
contenttype = MMGet.contentType(huc);
@@ -41,7 +40,7 @@
/**
* Saves it.
*/
- protected void write() throws IOException {
+ void write() throws IOException {
rewrite();
// MMGet.savedURLs.put(url, filename);
MMGet.addSavedURL(url, filename);

Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/ResourceWriter.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -13,13 +13,13 @@
* @author Andr&eacute; van Toly
* @version $Id$
*/
-public class ResourceWriter {
+class ResourceWriter {
private static final Logger log = Logging.getLoggerInstance(ResourceWriter.class);

private URL url;
- protected HttpURLConnection huc = null;
- protected static String filename;
- protected static int contenttype;
+ HttpURLConnection huc = null;
+ private static String filename;
+ static int contenttype;

/**
* Constructs writer.
@@ -41,19 +41,19 @@
filename = makeFilename(this.url);
}

- protected URL getUrl() {
+ URL getUrl() {
return huc.getURL();
}

- protected String getFilename() {
+ String getFilename() {
return filename;
}

- protected int getContentType() {
+ int getContentType() {
return contenttype;
}

- protected void disconnect() {
+ void disconnect() {
if (huc != null) {
//log.debug("disconnecting... " + url.toString());
huc.disconnect();
@@ -63,7 +63,7 @@
/**
* Saves it.
*/
- protected void write() throws IOException {
+ void write() throws IOException {
File f = getFile(filename);

if (f.exists()) {
@@ -139,7 +139,7 @@
* @param path the exact path from the startposition of the export (that's seen as 'root')
* @return file
*/
- public File getFile(String path) {
+ File getFile(String path) {
File f;
String resource;

@@ -171,7 +171,7 @@
* @param url of resource
* @return path and filename that can be saved (f.e. pics/button.gif)
*/
- public String makeFilename(URL url) {
+ String makeFilename(URL url) {
String link = url.toString();
link = MMGet.removeSessionid(link);


Modified: speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/UrlReaders.java
===================================================================
--- speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/UrlReaders.java 2015-08-06 21:00:44 UTC (rev 46728)
+++ speeltuin/andre/mmget/src/main/java/org/mmbase/mmget/UrlReaders.java 2015-08-06 22:17:00 UTC (rev 46729)
@@ -14,18 +14,17 @@
* @author Andr&eacute; van Toly
* @version $Id$
*/
-public class UrlReaders {
+class UrlReaders {
private static final Logger log = Logging.getLoggerInstance(UrlReaders.class);
-
- protected static UrlReader reader;
+
protected URL url = null;
- protected static int contenttype = -1;

public static UrlReader getUrlReader(URL url) throws IOException {

HttpURLConnection huc = (HttpURLConnection)url.openConnection();
- contenttype = MMGet.contentType(huc);
-
+ int contenttype = MMGet.contentType(huc);
+
+ UrlReader reader;
if (contenttype == MMGet.CONTENTTYPE_CSS) {
reader = new CSSReader(huc);
} else {

Reply all
Reply to author
Forward
0 new messages