I am using PhantomJs to develop a web scraping script on a dynamic JavaScript driven site.
The biggest problem I have had is working against a frame. I wrote my own function to
wait for an element to appear in a frame. Is there support to wait for and script against a frame?
// in a library module file:
wait_for_selector_in_frame = function(page,frame_selector,element_selector,wait_time,deferred) {
var start_time = new Date;
var timeout_function = null;
console.log('wait_for_selector_in_frame frame_selector ' + frame_selector + ' element_selector ' + element_selector);
timeout_function = function() {
var found = null;
var time_diff = null;
var time_now = null;
found = page.evaluate(function(frame_selector,element_selector) {
var element = null;
var frame_content_window = null;
var frame_content_window_document = null;
var frame_element = null;
var progress = '';
frame_element = document.querySelector(frame_selector);
if (frame_element) {
progress = progress + ' ' + frame_selector;
} else {
progress = progress + ' ' + frame_selector + ' not found';
return progress;
}
if (frame_element.tagName !== 'FRAME') {
progress = progress + ' not a FRAME';
return progress;
}
frame_content_window = frame_element.contentWindow;
if (frame_content_window) {
progress = progress + ' contentWindow';
} else {
progress = progress + ' contentWindow not found';
return progress;
}
frame_content_window_document = frame_content_window.document;
if (frame_content_window_document) {
progress = progress + ' contentWindow.document';
} else {
progress = progress + ' contentWindow.document not found';
return progress;
}
element = frame_content_window_document.querySelector(element_selector);
if (element) {
progress = 'all ' + progress + ' ' + element_selector;
return progress;
} else {
progress = progress + ' ' + element_selector + ' not found';
return progress;
}
},frame_selector,element_selector);
if (found.indexOf('all') === 0) {
console.log('found selector ' + found);
deferred.resolve('found element in frame ' + frame_selector + ' ' + element_selector);
} else {
time_now = new Date;
time_diff = time_now - start_time;
if (time_diff > wait_time) {
console.log('timeout waiting for selector ' + found);
deferred.reject(new Error(found));
} else {
setTimeout(timeout_function,200);
}
}
};
timeout_function();
return deferred.promise;
};