Casperjs Ajax调用 - waitForResource和解析错误错误、Casperjs、Ajax、waitForResource

2023-09-10 18:04:32 作者:孤城凉梦

Casperjs是一个伟大的工具。我已经能够理解某些基础知识。不过,我想解决的两个问题。首先,我拆我的code成几个功能关闭的原因。我得到一个解析错误。我已经添加的选项详细:真,日志级别:调试,看看它指出了一个行号,但我没有得到任何结果。其次,我想打一个Ajax调用,这是异步。我使用 WAITFOR 但我读的一个更有效的方法本文章。如何设置 waitForResource()来跟踪AJAX请求?以及如何显示语法错误行?

code

  VAR的网址= ['http://9gag.tv/'];
VAR卡斯帕=要求(卡斯帕)。创建({
    详细:真正的,
    LOGLEVEL:调试
});

功能getNumberOfItems(卡斯帕){
    返回casper.getElementsInfo(列表视图.badge并网项目)的长度。
}

功能tryAndScroll(卡斯帕){
  casper.page.scrollPosition = {顶部:casper.page.scrollPosition [顶] + 4000,左:0};
  VAR信息= casper.getElementInfo('徽章后电网负载更多。');
  如果(info.visible){
    VAR curItems = getNumberOfItems(卡斯帕);
    如果(curItems< = 60){
        casper.waitFor(功能检查(){
          返回curItems = getNumberOfItems(卡斯帕)!;
        },然后函数(){
          tryAndScroll(本);
        },功能onTimeout(){
          this.echo(Timout到达);
        },10000);
    }
  } 其他 {
    casper.echo(没有更多的项目);
  }

}

功能combineArrays(X,Y){
    VAR的结果= [];
    对于(VAR I = 0,I< page_links.length;我++){
        结果[I] = {};
        导致[I] .page_link = X [I]
        结果[I] .video_link = Y [I]
    }
    返回结果;
}

功能li​​nkScraper(X){
    变种page_links = [];
    VAR的YouTube = [];
    对于(VAR I = 0; I< x.length;我++)
    {//启动循环
        casper.thenOpen(X [I]中,功能(){
            //向下滚动元素
            tryAndScroll(本);
            casper.then(函数(){
              this.getElementsInfo('标题')。的forEach(函数(元){
                //跳过没有href属性的元素...
                如果(!element.attributes.href){
                  返回;
                }
                page_links.push(元素[属性] [HREF]);
                casper.thenOpen(element.attributes.href,函数(){
                  this.click('。responsivewrapper');
                })。然后(函数(){
                  casper.each(this.getElementsInfo('徽章的YouTube播放器),功能(卡斯帕,元素,J){
                    youtube.push(元素[属性] [SRC]);
                  });
                })本地主机;
              });
            });
        });
    }
    返回combineArrays(page_links,YouTube的);
}

功能stringifyResult(网页){
    //通行证链接数组linkScraper
    VAR linksArr = linkScraper.call(这一点,网页);
    //将结果发送到PHP页面
     服务器=HTTP://本地主机:8181 / hashtag_pull / lib目录/ 9GagPrivateApi.php;
     this.waitFor(函数(){
         响应= this.evaluate(函数(){
            $阿贾克斯({
                键入:POST,
                网址:服务器,
                数据:JSON.stringify(linksArr)
                //数据类型:JSON,
                的contentType:应用/ JSON
                成功:功能(数据){
                    this.echo(全部完成。);
                    返回this.exit();
                    //返回data.responseText;
                },
                错误:函数(XHR,状态,错误){
                    返回this.echo(错误);
                }
            });
         });
     });
}



casper.start(),然后(函数(){
    this.echo(启动);
});
casper.userAgent('的Mozilla / 5.0(Macintosh电脑;英特尔的Mac OS X));
casper.run(stringifyResult.call(这一点,网址));
 

解决方案

有很多问题,你的code。

1。语法错误

CasperJS和PhantomJS本身不会告诉你在哪里语法错误,但它可能是在行的(VAR I = 0,I< page_links.length;我++){。更改; 。而 page_links 未在此行中定义。你可能意味着 X 。 对于未来:CasperJS脚本是普通的JavaScript。您可以使用在线工具,如 jslint.com 找到这样的bug(和其他问题,您的code)。

还有一个放错位置本地主机

2。

有许多含义取决于它被放置。

a)全球

就拿最后一行:

  casper.run(stringifyResult.call(这一点,网址));
 
angular能用ajax吗,在AngularJS中使用AJAX的方法

是所有功能之外。因此,它是指窗口(是有两个独立的窗口对象,一个页面的上下文内部,一个在外部) 。它不是指卡斯珀,你可能期望。用途:

  casper.run(stringifyResult.call(卡斯帕,网址));
 

(^非最终code:见6。)的

B)jQuery的回调

里面的:

 成功:功能(数据){
    this.echo(全部完成。);
    返回this.exit();
},
 

指的jQuery的 jqXHR 对象(顺便说一句,你也无法从异步函数的函数内返回的东西)。它无关CasperJS。此外,它是不可能调用从页面上下文CasperJS功能( casper.evaluate内()),因为页面的上下文是沙箱。它具有外部定义的变量没有访问(包括 linksArr 服务器)。请参见这个了解详情。

3。不必要的 WAITFOR

WAITFOR 将永远不会结束,但你可能设计这种方式让你的AJAX请求被发送。的问题是,在同一断请求将被发送,每20毫秒。

4。发送Ajax请求

CasperJS提供了一个实用程序来发送AJAX请求的页面内容: __ __ utils的。sendAJAX() 。它也阻止执行默认,所以没有必要等待请求的乱绑定的方式

 函数stringifyResult(网页){
    VAR linksArr = linkScraper.call(这一点,网页);

    //将结果发送到PHP页面
    服务器=HTTP://本地主机:8181 / hashtag_pull / lib目录/ 9GagPrivateApi.php;
    this.evaluate(功能(服务器,linksArr){
        。__utils __ sendAJAX(服务器,POST,JSON.stringify(linksArr));
    },服务器,linksArr);
}
 

(^非最终code:见5。)的

5。从异步函数返回

所有那么* 等待* CasperJS功能是异步的。通过调用它们,你安排相关的步骤在当前步结束执行。

这意味着你不能返回的东西从 linkScraper ,因为它包含的异步code。或者至少,你无法返回的最终结果,因为它们是由填充异步code。但是,您可以返回最终将包含结果数组。

一个可能的修正,将是移动电话相结合的外 linkScraper

 函数linkScraper(X){
    变种page_links = [];
    VAR的YouTube = [];
    //这里有异步调用
    回{复:page_links,YT:YouTube上};
}
功能stringifyResult(网页){
    VAR linksObj = linkScraper.call(这一点,网页);
    //这里linksObj包含空列表
    this.then(函数(){
        //这里linksObj包含填充列表
        VAR linksArr = combineArrays(linksObj.pl,linksObj.yt);

        服务器=HTTP://本地主机:8181 / hashtag_pull / lib目录/ 9GagPrivateApi.php;
        this.evaluate(功能(服务器,linksArr){
            。__utils __ sendAJAX(服务器,POST,JSON.stringify(linksArr));
        },服务器,linksArr);
    });
}
 

6。 退出

casper.exit()是立竿见影的。因此,当你调用它,执行停止。你必须要小心,当调用它,因为它可能是仍有步骤计划的情况。

有一种方法,以避免调用 casper.exit()以不 casper.run提供回调()。如果你提供这样的回调,将不会有自动退出时执行的所有步骤。

  casper.then(stringifyResult.call(卡斯帕,网址))运行()。
 

Casperjs is a great tool. I have been able to understand certain basics. However, I am trying to tackle two issues. First, I split my code into several functions for closure reasons. I am getting a parse error. I have added the option to verbose: true, logLevel: "debug" and see if it points out a line number but I get no results. Second, I want to make an ajax call, which are async. I am using waitFor but I read THIS article for a more efficient way. How can I set waitForResource() to track AJAX requests? and how to display parse error lines?

CODE

var urls = ['http://9gag.tv/'];
var casper = require('casper').create({
    verbose: true,
    logLevel: "debug"
});

function getNumberOfItems(casper) {
    return casper.getElementsInfo(".listview .badge-grid-item").length;
}

function tryAndScroll(casper) {
  casper.page.scrollPosition = { top: casper.page.scrollPosition["top"] + 4000, left: 0 };
  var info = casper.getElementInfo('.badge-post-grid-load-more');
  if (info.visible) {
    var curItems = getNumberOfItems(casper);
    if( curItems <= 60 ) {
        casper.waitFor(function check(){
          return curItems != getNumberOfItems(casper);
        }, function then(){
          tryAndScroll(this);
        }, function onTimeout(){
          this.echo("Timout reached");
        }, 10000);
    }
  } else {
    casper.echo("no more items");
  }

}

function combineArrays(x, y) {
    var result = [];
    for(var i = 0, i < page_links.length; i++) {
        result[i] = {};
        result[i].page_link = x[i];
        result[i].video_link = y[i];
    }
    return result;
}

function linkScraper(x){
    var page_links = [];
    var youtube = [];
    for (var i = 0; i < x.length; i++)
    { // start for loop
        casper.thenOpen(x[i], function() {
            //Scroll down for elements
            tryAndScroll(this);
            casper.then(function() {
              this.getElementsInfo('.title').forEach(function(element) {
                // skip elements that don't have a href attribute...
                if (!element.attributes.href) {
                  return;
                }
                page_links.push( element["attributes"]["href"] );
                casper.thenOpen(element.attributes.href, function() {
                  this.click('.responsivewrapper');
                }).then(function(){
                  casper.each(this.getElementsInfo('.badge-youtube-player'), function(casper, element, j) {
                    youtube.push( element["attributes"]["src"] );
                  });
                })localhost;
              });
            });
        });
    }
    return combineArrays(page_links,youtube);
}

function stringifyResult(webpages){
    //Pass link array to linkScraper
    var linksArr = linkScraper.call(this, webpages);
    //send results to php page
     server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
     this.waitFor( function() {
         response = this.evaluate(function() {
            $.ajax({
                type: "POST",
                url: server,
                data: JSON.stringify(linksArr),
                //dataType: 'json',
                contentType: "application/json",
                success: function (data) {
                    this.echo("All done.");
                    return this.exit();
                    //return data.responseText;
                },
                error: function (xhr,status,error){
                    return this.echo(error);
                }
            });
         });
     });
}



casper.start().then(function() {
    this.echo("Starting");
});
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.run(stringifyResult.call(this, urls));

解决方案

There are many problems with your code.

1. Syntax error

CasperJS and PhantomJS itself won't show you where the syntax error is, but it is probably in line for(var i = 0, i < page_links.length; i++) {. Change , to ;. And page_links is not defined in this line. You probably meant x. For the future: CasperJS scripts are plain JavaScript. You can use online tools such as jslint.com to find such bugs (and other problems with your code).

There is also a misplaced localhost.

2. this

this has many meanings depending on where it is placed.

a) Global this

Take for example the very last line:

casper.run(stringifyResult.call(this, urls));

This this is outside of all functions. So it refers to window (yes there are two separate window objects, one inside of the page context and one outside). It doesn't refer to casper which you probably expected. Use:

casper.run(stringifyResult.call(casper, urls));

(^ non-final code: see 6.)

b) jQuery callback

The this inside of:

success: function (data) {
    this.echo("All done.");
    return this.exit();
},

refers to the jqXHR object of jQuery (btw, you also cannot return something from an asynchronous function inside a function). It has nothing to do with CasperJS. Furthermore, it is impossible to call CasperJS functions from the page context (inside of casper.evaluate()), because the page context is sandboxed. It has no access to variables defined outside (this includes linksArr and server). See this for more information.

3. Unnecessary waitFor

Your waitFor will never finish, but you probably designed it this way so that your AJAX request is sent. The problem is that the same broken request will be sent every 20 milliseconds.

4. Sending AJAX request

CasperJS provides a utility to send AJAX requests in the page context: __utils__.sendAJAX(). It also blocks the execution by default, so there is no need to wait for the request in a out-of-bound fashion.

function stringifyResult(webpages){
    var linksArr = linkScraper.call(this, webpages);

    //send results to php page
    server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
    this.evaluate(function(server, linksArr){
        __utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
    }, server, linksArr);
}

(^ non-final code: see 5.)

5. Returning from asynchronous function

All then* and wait* CasperJS functions are asynchronous. By calling them, you schedule the associated step to be executed at the end of the current step.

It means that you cannot return something from linkScraper, because it contains asynchronous code. Or at least you cannot return the final results, because they are populated by the asynchronous code. You can however return the arrays which will eventually contain the result.

One possible fix, would be to move the combine call outside of linkScraper:

function linkScraper(x){
    var page_links = [];
    var youtube = [];
    // here are asynchronous calls
    return {pl: page_links, yt: youtube};
}
function stringifyResult(webpages){
    var linksObj = linkScraper.call(this, webpages);
    // here linksObj contains empty lists
    this.then(function(){
        // here linksObj contains populated lists
        var linksArr = combineArrays(linksObj.pl, linksObj.yt);

        server = "http://localhost:8181/hashtag_pull/lib/9GagPrivateApi.php";
        this.evaluate(function(server, linksArr){
            __utils__.sendAJAX(server, "POST", JSON.stringify(linksArr));
        }, server, linksArr); 
    });
}

6. exit

casper.exit() is immediate. So when you call it, the execution stops. You have to be careful when to call it, because it may be the case that there are still steps scheduled.

There is a way to avoid calling casper.exit() by not providing a callback to casper.run(). If you provide such a callback, there will be no automatic exit when all steps are executed.

casper.then(stringifyResult.call(casper, urls)).run();

 
精彩推荐
图片推荐