任务没有完成第二次[间歇性问题] - 异步/等待性问题、任务

2023-09-03 06:06:34 作者:误入情途

我有一个 web浏览器控件是Windows窗体项目。它通过导航中的MyTableTest.html所有可用的URL。有四个URL在本页面和web浏览器经过每一个 - 一个接一个。一旦它到达最后一个它应该去的第一次。它工作正常,在第一次迭代 - 但不会对网址在第二次迭代。这是一个间歇性问题 - 某些时候它的工作原理。

看来(从日志)他等待任务未完成。有什么可以做,以使其在第二次迭代工作也?

注:MyTableTest.html下面给出

请注意:这是基于岗位Get readyState的从WebBrowser控件没有的DoEvents

问题的

  startNavigation();

       WriteLogFunction(位置1);

       //等待DOM onload事件,抛出如果取消
       等待onloadTcs.Task;

       //问题:没有达到这个位置,在第二次航行
       WriteLogFunction(位置2);
 

code

 公共部分类Form1中:形态
{
    公共Form1中()
    {
        的InitializeComponent();
        this.Load + = MainForm_Load;
    }

    名单<字符串> visitedProducts =新的名单,其中,串>();
    字符串nextNavigationUrl =的String.Empty;

    //窗体的Load事件处理程序
    异步无效MainForm_Load(对象发件人,EventArgs的)
    {
        //取消整个操作20秒
        VAR CTS =新CancellationTokenSource(20000);

        //urlStore.Add(@"C:\Samples_L\MyTableTest.html);
        nextNavigationUrl = GetHomoePageUrl();
        等待NavigateInLoopAsync(cts.Token);
    }

    //定位到每个URL在一个循环
    异步任务NavigateInLoopAsync(的CancellationToken克拉)
    {

        布尔isIterationComplete = FALSE;

        而(!isIterationComplete)
        {

            字符串的URL =的String.Empty;

            如果(String.IsNullOrEmpty(nextNavigationUrl))
            {
                WriteLogFunction(关闭);
                isIterationComplete = TRUE;
            }
            其他
            {
                URL = nextNavigationUrl;
                ct.ThrowIfCancellationRequested();


                WriteLogFunction(调用NavigateAsync);

                行动startNavigation =()=> this.webBrowser1.Navigate(URL);
                VAR HTML =等待NavigateAsync(CT,startNavigation);


            }
        }
    }

    //异步导航
    异步任务<字符串> NavigateAsync(的CancellationToken克拉,动作startNavigation)
    {
        VAR onloadTcs =新TaskCompletionSource<布尔>();
        事件处理程序onloadEventHandler = NULL;

        WriteLogFunction(内部功能NavigateAsync);

        WebBrowserDocumentCompletedEventHandler documentCompletedHandler =委托
        {
            // DocumentCompleted可称为几个时间在同一页,
            //如果页面框架
            如果(onloadEventHandler!= NULL)
                返回;

            //所以,观察DOM onload事件,以确保文档完全加载
            onloadEventHandler =(S,E)=>
                onloadTcs.TrySetResult(真正的);
            this.webBrowser1.Document.Window.AttachEventHandler(的onload,onloadEventHandler);
        };

        this.webBrowser1.DocumentCompleted + = documentCompletedHandler;

        尝试
        {
            使用(ct.Register(()=> onloadTcs.TrySetCanceled(),useSynchronizationContext:真))
            {
                startNavigation();

                WriteLogFunction(位置1);

                //等待DOM onload事件,抛出如果取消
                等待onloadTcs.Task;

                //问题:没有达到这个位置,在第二次航行
                WriteLogFunction(位置2);
            }
        }
        最后
        {
            this.webBrowser1.DocumentCompleted  -  = documentCompletedHandler;
            如果(onloadEventHandler!= NULL)
                this.webBrowser1.Document.Window.DetachEventHandler(的onload,onloadEventHandler);
        }

        WriteLogFunction(放置3);

        //该页面已完全加载,现在

        //可选:让网页运行其动态的AJAX的code,
        //我们可能会添加其他超时这个循环
        做{等待Task.Delay(500克拉); }
        而(this.webBrowser1.IsBusy);

        //呼叫处理 - 添加者Lijo
        ExerciseApp(this.webBrowser1,NULL);

        //返回页面的HTML内容
        返回this.webBrowser1.Document.GetElementsByTagName(HTML)[0] .OuterHtml;
    }

    私人无效ExerciseApp(对象发件人,WebBrowserDocumentCompletedEventArgs E)
    {
        WriteLogFunction(ExerciseApp);
        变种WB =发件人的web浏览器;
        INT catalogElementIterationCounter = 0;
        VAR elementsToConsider = wb.Document.All;
        字符串PRODUCTURL =的String.Empty;
        布尔isClicked = FALSE;

        的foreach(的HtmlElement E1在elementsToConsider)
        {

            catalogElementIterationCounter ++;

            字符串x = e1.TagName;
            字符串IDSTR = e1.GetAttribute(ID);

            如果(!String.IsNullOrWhiteSpace(IDSTR))
            {
                //每个产品导航
                如果(idStr.Contains(catalogEntry_img))
                {
                    PRODUCTURL = e1.GetAttribute(HREF);
                    如果(!visitedProducts.Contains(PRODUCTURL))
                    {
                        WriteLogFunction(PRODUCTURL  - + PRODUCTURL);
                        visitedProducts.Add(PRODUCTURL);
                        isClicked = TRUE;

                        //e1.InvokeMember("Click);
                        nextNavigationUrl = PRODUCTURL;

                        打破;
                    }

                }
            }
        }

        WriteLogFunction(visitedProducts.Count.ToString());
        WriteLogFunction(nextNavigationUrl);

        如果(visitedProducts.Count == 4)
        {
            WriteLogFunction(条件B);
            visitedProducts =新的名单,其中,串>();
        }

        如果(!isClicked)
        {
            WriteLogFunction(条件C);
            nextNavigationUrl = GetHomoePageUrl();
        }
    }

    私人无效HomoePageNavigate()
    {
        WebBrowser1.Navigate时(GetHomoePageUrl());
    }

    私人字符串GetHomoePageUrl()
    {
       返回@C:\ Samples_L \ MyTableTest.html;
    }

    私人无效WriteLogFunction(字符串strMessage)
    {
        使用(StreamWriter的W = File.AppendText(log.txt中))
        {
            w.WriteLine(\ r \ N {0} ... {1},DateTime.Now.ToLongTimeString(),strMessage);
        }
    }



}
 
同步任务 异步任务 宏任务 微任务 任务的执行过程实例详解 setTimeout 是同步还是异步

MyTableTest.html

 < HTML>
< HEAD>

    <风格类型=文本/ CSS>
        表 {
            边界:2px的蓝色实;
        }

        TD {
            边界:1px的固体水鸭;
        }
    < /风格>

< /头>
<身体GT;

    <表ID =四格>
         &其中; TR>
            < TD>
                < A HREF =htt​​ps://www.wikipedia.org/ID =catalogEntry_img63666>

                    &所述; IMG SRC =SSSS
                        ALT =BWIDTH =70/>
                &所述; / a取代;
            < / TD>
            < TD>
                < A HREF =htt​​p://www.keralatourism.org/ID =catalogEntry_img63667>

                    &所述; IMG SRC =SSSS
                        ALT =AWIDTH =70/>
                &所述; / a取代;
            < / TD>
        < / TR>
        &其中; TR>
            < TD>
                < A HREF =htt​​p://stackoverflow.com/users/696627/lijoID =catalogEntry_img63664>

                    &所述; IMG SRC =SSSS
                        ALT =GWIDTH =70/>
                &所述; / a取代;
            < / TD>
            < TD>
                < A HREF =htt​​p://msdn.microsoft.com/en-US/#fbid=zgGLygxrE84ID =catalogEntry_img63665>

                    &所述; IMG SRC =SSSS
                        ALT =YWIDTH =70/>
                &所述; / a取代;
            < / TD>
        < / TR>

    < /表>
< /身体GT;

< / HTML>
 

解决方案   

位置1字样,但不是位置2。什么是您的建议   要克服这一点?是你能够重现这个问题?

我还没有试过摄制,但就像我在评论说,很显然,无论是 DocumentCompleted 的window.onload 不被解雇该特定URL。这是可能的,如果页不能被满载,或如如果URL无效或服务器无法访问。这是超时的逻辑是什么。

将多了一个跟踪的 documentCompletedHandler 的开始,而多了一个内 onloadEventHandler

  onloadEventHandler =(S,E)=> {日志(内幕onloadEventHandler);
    onloadTcs.TrySetResult(真正的); }
 

看看被炒鱿鱼,​​什么不能。

此外,您还应该实现 web浏览器功能控制以使现代IE浏览器的功能。这也常常会影响网页的加载。复制 SetFeatureBrowserEmulation 从这里。

I have a WebBrowser control is Windows Forms project. It navigates through all the urls available in "MyTableTest.html". There are four urls in this page and the webbrowser goes through each one – one by one. Once it reaches the last one it should go to the first again. It works fine in the first iteration – but not going to the urls in the second iteration. This is an intermittent issue - certain times it works.

It seems (from the log) that he awaited task is not completed. What can be done to make it working in the second iteration also?

Note: MyTableTest.html is given below

Note: This is based on the post Get ReadyState from WebBrowser control without DoEvents

Issue

       startNavigation();

       WriteLogFunction("Location 1");

       // wait for DOM onload event, throw if cancelled
       await onloadTcs.Task;

       //ISSUE: Not reaching this location at second time navigation
       WriteLogFunction("Location 2");

Code

public partial class Form1 : Form
{
    public Form1()
    {
        InitializeComponent();
        this.Load += MainForm_Load;
    }

    List<string> visitedProducts = new List<string>();
    string nextNavigationUrl = String.Empty;

    // Form Load event handler
    async void MainForm_Load(object sender, EventArgs e)
    {
        // cancel the whole operation in 20 sec
        var cts = new CancellationTokenSource(20000);

        //urlStore.Add(@"C:\Samples_L\MyTableTest.html");
        nextNavigationUrl = GetHomoePageUrl();
        await NavigateInLoopAsync(cts.Token);
    }

    // navigate to each URL in a loop
    async Task NavigateInLoopAsync(CancellationToken ct)
    {

        bool isIterationComplete = false;

        while (!isIterationComplete)
        {

            string url = String.Empty;

            if (String.IsNullOrEmpty(nextNavigationUrl))
            {
                WriteLogFunction("Close");
                isIterationComplete = true;
            }
            else
            {
                url = nextNavigationUrl;
                ct.ThrowIfCancellationRequested();


                WriteLogFunction("Calling NavigateAsync");

                Action startNavigation = () => this.webBrowser1.Navigate(url);
                var html = await NavigateAsync(ct, startNavigation);


            }
        }
    }

    // asynchronous navigation
    async Task<string> NavigateAsync(CancellationToken ct, Action startNavigation)
    {
        var onloadTcs = new TaskCompletionSource<bool>();
        EventHandler onloadEventHandler = null;

        WriteLogFunction("Inside Function NavigateAsync");

        WebBrowserDocumentCompletedEventHandler documentCompletedHandler = delegate
        {
            // DocumentCompleted may be called several time for the same page,
            // if the page has frames
            if (onloadEventHandler != null)
                return;

            // so, observe DOM onload event to make sure the document is fully loaded
            onloadEventHandler = (s, e) =>
                onloadTcs.TrySetResult(true);
            this.webBrowser1.Document.Window.AttachEventHandler("onload", onloadEventHandler);
        };

        this.webBrowser1.DocumentCompleted += documentCompletedHandler;

        try
        {
            using (ct.Register(() => onloadTcs.TrySetCanceled(), useSynchronizationContext: true))
            {
                startNavigation();

                WriteLogFunction("Location 1");

                // wait for DOM onload event, throw if cancelled
                await onloadTcs.Task;

                //ISSUE: Not reaching this location at second time navigation
                WriteLogFunction("Location 2");
            }
        }
        finally
        {
            this.webBrowser1.DocumentCompleted -= documentCompletedHandler;
            if (onloadEventHandler != null)
                this.webBrowser1.Document.Window.DetachEventHandler("onload", onloadEventHandler);
        }

        WriteLogFunction("Place 3");

        // the page has fully loaded by now

        // optional: let the page run its dynamic AJAX code,
        // we might add another timeout for this loop
        do { await Task.Delay(500, ct); }
        while (this.webBrowser1.IsBusy);

        //Call Processing -- Added By Lijo
        ExerciseApp(this.webBrowser1, null);

        // return the page's HTML content
        return this.webBrowser1.Document.GetElementsByTagName("html")[0].OuterHtml;
    }

    private void ExerciseApp(object sender, WebBrowserDocumentCompletedEventArgs e)
    {
        WriteLogFunction("ExerciseApp");
        var wb = sender as WebBrowser;
        int catalogElementIterationCounter = 0;
        var elementsToConsider = wb.Document.All;
        string productUrl = String.Empty;
        bool isClicked = false;

        foreach (HtmlElement e1 in elementsToConsider)
        {

            catalogElementIterationCounter++;

            string x = e1.TagName;
            String idStr = e1.GetAttribute("id");

            if (!String.IsNullOrWhiteSpace(idStr))
            {
                //Each Product Navigation
                if (idStr.Contains("catalogEntry_img"))
                {
                    productUrl = e1.GetAttribute("href");
                    if (!visitedProducts.Contains(productUrl))
                    {
                        WriteLogFunction("productUrl -- " + productUrl);
                        visitedProducts.Add(productUrl);
                        isClicked = true;

                        //e1.InvokeMember("Click");
                        nextNavigationUrl = productUrl;

                        break;
                    }

                }
            }
        }

        WriteLogFunction(visitedProducts.Count.ToString());
        WriteLogFunction(nextNavigationUrl);

        if (visitedProducts.Count == 4)
        {
            WriteLogFunction("Condition B");
            visitedProducts = new List<string>();
        }

        if (!isClicked)
        {
            WriteLogFunction("Condition C");
            nextNavigationUrl = GetHomoePageUrl();
        }
    }

    private void HomoePageNavigate()
    {
        webBrowser1.Navigate(GetHomoePageUrl());
    }

    private string GetHomoePageUrl()
    {
       return @"C:\Samples_L\MyTableTest.html";
    }

    private void WriteLogFunction(string strMessage)
    {
        using (StreamWriter w = File.AppendText("log.txt"))
        {
            w.WriteLine("\r\n{0} ..... {1} ", DateTime.Now.ToLongTimeString(), strMessage);
        }
    }



}

MyTableTest.html

<html>
<head>

    <style type="text/css">
        table {
            border: 2px solid blue;
        }

        td {
            border: 1px solid teal;
        }
    </style>

</head>
<body>

    <table id="four-grid">
         <tr>
            <td>
                <a href="https://www.wikipedia.org/" id="catalogEntry_img63666">

                    <img src="ssss"
                        alt="B" width="70" />
                </a>
            </td>
            <td>
                <a href="http://www.keralatourism.org/" id="catalogEntry_img63667">

                    <img src="ssss"
                        alt="A" width="70" />
                </a>
            </td>
        </tr>
        <tr>
            <td>
                <a href="http://stackoverflow.com/users/696627/lijo" id="catalogEntry_img63664">

                    <img src="ssss"
                        alt="G" width="70" />
                </a>
            </td>
            <td>
                <a href="http://msdn.microsoft.com/en-US/#fbid=zgGLygxrE84" id="catalogEntry_img63665">

                    <img src="ssss"
                        alt="Y" width="70" />
                </a>
            </td>
        </tr>

    </table>
</body>

</html>

解决方案

"Location 1" is printed but not "Location 2". What is your suggestion to overcome this? Were you able to reproduce this issue?

I haven't tried to repro, but like I said in the comment, it's obvious that either DocumentCompleted or window.onload is not getting fired for that particular URL. That's possible if the page cannot be fully loaded, or e.g. if the URL is invalid or the server becomes unreachable. That's what the time-out logic is for.

Put one more trace at the beginning of documentCompletedHandler, and one more inside onloadEventHandler:

onloadEventHandler = (s, e) => { Log("inside onloadEventHandler");
    onloadTcs.TrySetResult(true); }

See what gets fired and what doesn't.

Besides, your should also implement WebBrowser Feature Control to enable the modern IE features. This also often affects how web page are loaded. Copy SetFeatureBrowserEmulation from here.