javascript - How can I change this PhantomJS script to NodeJS for web scraping -


this phantomjs script use scraping html dom in web page. use waiting dom ready

//scrap_phantom.js var server = require("webserver").create(); var page = require("webpage").create(); var port = require('system').env.port || 3000; var url = "http://www.example.com";  server.listen(port, function (request, response) {     function onpageready() {         var htmlcontent = page.evaluate(function () {               return document.documentelement.outerhtml;         });         response.write(htmlcontent);         response.close();         phantom.exit();     }     page.open(url, function (status) {         function checkreadystate() {             settimeout(function () {                 var readystate = page.evaluate(function () {                     return document.readystate;                 });                 if ("complete" === readystate) {                     onpageready();                 } else {                     checkreadystate();                 }             });         }         checkreadystate();     }); }); 

i'm testing in cmd "phantomjs scrap_phantom.js". above code work. now, change code nodejs script this:

//scrap_node.js var http = require("http"); var phantom = require('phantom'); var url = "http://www.example.com";  http.createserver(function(request, response) {      function onpageready() {         var htmlcontent = page.evaluate(function () {               return document.documentelement.outerhtml;         });         response.write(htmlcontent);         response.close();         phantom.exit();     }  phantom.create(function (ph) {     return ph.createpage(function (page) {         page.open(url, function (status) {             function checkreadystate() {                 settimeout(function () {                     var readystate = page.evaluate(function () {                         return document.readystate;                     });                     if ("complete" === readystate) {                         onpageready();                     } else {                         checkreadystate();                     }                 });             }             checkreadystate();         });     });     }, {         dnodeopts: {weak: false}     }); }).listen(3000); 

i'm testing in cmd "node scrap_node.js". code not work me. it's loading long time , doesn't return errors. why not working me?

there multiple issues code.

phantomjs-node bridge between node.js , phantomjs. uses different syntax , none of functions synchronous. means if write in phantomjs:

var result = page.evaluate(function(arg1, arg2){     //...1     return stuff; }, "arg1", "arg2"); //...2 

then equivalent in phantomjs-node (see functionality details) this:

page.evaluate(function(arg1, arg2){     //...1     return stuff; }, function(result){     //...2 }, "arg1", "arg2"); 

it inherently asynchronous.

the other thing phantom doesn't have exit function, ph does.

furthermore, settimeout(function(){...}) isn't doing useful. need pass timeout value useful.


Comments

Popular posts from this blog

android - Gradle sync Error:Configuration with name 'default' not found -

java - Andrioid studio start fail: Fatal error initializing 'null' -

html - jQuery UI Sortable - Remove placeholder after item is dropped -