-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnl2we.html
More file actions
1566 lines (1135 loc) · 110 KB
/
nl2we.html
File metadata and controls
1566 lines (1135 loc) · 110 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="zh-CN">
<head hexo-theme='https://github.com/volantis-x/hexo-theme-volantis/tree/4.3.1'>
<meta charset="utf-8">
<!-- SEO相关 -->
<!-- 渲染优化 -->
<meta http-equiv='x-dns-prefetch-control' content='on' />
<link rel='dns-prefetch' href='https://cdn.jsdelivr.net'>
<link rel="preconnect" href="https://cdn.jsdelivr.net" crossorigin>
<meta name="renderer" content="webkit">
<meta name="force-rendering" content="webkit">
<meta http-equiv="X-UA-Compatible" content="IE=Edge,chrome=1">
<meta name="HandheldFriendly" content="True" >
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<link rel="preload" href="/css/first.css" as="style">
<!-- 页面元数据 -->
<title>[论文解读]Mapping natural language commands to web elements - lornd's blog</title>
<!-- feed -->
<!-- import meta -->
<!-- link -->
<!-- import link -->
<link rel="stylesheet" href="/css/first.css">
<link rel="stylesheet" href="/css/style.css" media="print" onload="this.media='all';this.onload=null">
<noscript><link rel="stylesheet" href="/css/style.css"></noscript>
<script id="loadcss"></script>
<script>
if (/*@cc_on!@*/false || (!!window.MSInputMethodContext && !!document.documentMode))
document.write(
'<style>'+
'html{'+
'overflow-x: hidden !important;'+
'overflow-y: hidden !important;'+
'}'+
'.kill-ie{'+
'text-align:center;'+
'height: 100%;'+
'margin-top: 15%;'+
'margin-bottom: 5500%;'+
'}'+
'</style>'+
'<div class="kill-ie">'+
'<h1><b>抱歉,您的浏览器无法访问本站</b></h1>'+
'<h3>微软已经于2016年终止了对 Internet Explorer (IE) 10 及更早版本的支持,<br/>'+
'继续使用存在极大的安全隐患,请使用当代主流的浏览器进行访问。</h3><br/>'+
'<a target="_blank" rel="noopener" href="https://www.microsoft.com/zh-cn/WindowsForBusiness/End-of-IE-support"><strong>了解详情 ></strong></a>'+
'</div>');
</script>
<noscript>
<style>
html{
overflow-x: hidden !important;
overflow-y: hidden !important;
}
.kill-noscript{
text-align:center;
height: 100%;
margin-top: 15%;
margin-bottom: 5500%;
}
</style>
<div class="kill-noscript">
<h1><b>抱歉,您的浏览器无法访问本站</b></h1>
<h3>本页面需要浏览器支持(启用)JavaScript</h3><br/>
<a target="_blank" rel="noopener" href="https://www.baidu.com/s?wd=启用JavaScript"><strong>了解详情 ></strong></a>
</div>
</noscript>
<!-- hexo injector head_end start -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.12.0/dist/katex.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/hexo-math@4.0.0/dist/style.css">
<!-- hexo injector head_end end --><link href="https://cdn.bootcss.com/KaTeX/0.11.1/katex.min.css" rel="stylesheet" /></head>
<body>
<header id="l_header" class="l_header auto shadow blur show" style='opacity: 0' >
<div class='container'>
<div id='wrapper'>
<div class='nav-sub'>
<p class="title"></p>
<ul class='switcher nav-list-h m-phone' id="pjax-header-nav-list">
<li><a id="s-comment" class="fas fa-comments fa-fw" target="_self" href='javascript:void(0)'></a></li>
<li><a id="s-toc" class="s-toc fas fa-list fa-fw" target="_self" href='javascript:void(0)'></a></li>
</ul>
</div>
<div class="nav-main">
<a class="title flat-box" target="_self" href='/'>
<img no-lazy class='logo' src='https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/blog/Logo-NavBar@3x.png'/>
</a>
<div class='menu navigation'>
<ul class='nav-list-h m-pc'>
<li>
<a class="menuitem flat-box faa-parent animated-hover" href=/
id="home"
>
<i class='fas fa-rss fa-fw'></i>博客
</a>
</li>
<li>
<a class="menuitem flat-box faa-parent animated-hover" href=/friends/
id="friends"
>
<i class='fas fa-link fa-fw'></i>友情链接
</a>
</li>
</ul>
</div>
<div class="m_search">
<form name="searchform" class="form u-search-form">
<i class="icon fas fa-search fa-fw"></i>
<input type="text" class="input u-search-input" placeholder="Search..." />
</form>
</div>
<ul class='switcher nav-list-h m-phone'>
<li><a class="s-search fas fa-search fa-fw" target="_self" href='javascript:void(0)'></a></li>
<li>
<a class="s-menu fas fa-bars fa-fw" target="_self" href='javascript:void(0)'></a>
<ul class="menu-phone list-v navigation white-box">
<li>
<a class="menuitem flat-box faa-parent animated-hover" href=/
id="home"
>
<i class='fas fa-rss fa-fw'></i>博客
</a>
</li>
<li>
<a class="menuitem flat-box faa-parent animated-hover" href=/friends/
id="friends"
>
<i class='fas fa-link fa-fw'></i>友情链接
</a>
</li>
</ul>
</li>
</ul>
</div>
</div>
</div>
</header>
<div id="l_body">
<div id="l_cover">
<div id="full" class='cover-wrapper post dock' style="display: none;">
<div class='cover-bg lazyload placeholder' data-bg="https://cdn.jsdelivr.net/gh/volantis-x/cdn-wallpaper-minimalist/2020/042.jpg"></div>
<div class='cover-body'>
<div class='top'>
<p class="title">lornd's Blog</p>
<p class="subtitle">学算法时一定要想怎么去改进!</p>
</div>
<div class='bottom'>
<div class='menu navigation'>
<div class='list-h'>
</div>
</div>
</div>
</div>
<div id="scroll-down" style="display: none;"><i class="fa fa-chevron-down scroll-down-effects"></i></div>
</div>
</div>
<div id="safearea">
<div class="body-wrapper" id="pjax-container">
<div class='l_main'>
<article class="article post white-box reveal md shadow article-type-post" id="post" itemscope itemprop="blogPost">
<div class="article-meta" id="top">
<h1 class="title">
[论文解读]Mapping natural language commands to web elements
</h1>
<div class='new-meta-box'>
<div class='new-meta-item author'>
<a class='author' href="https://lornd.top" rel="nofollow">
<img no-lazy src="\images\avatar.jpg">
<p>lornd</p>
</a>
</div>
<div class="new-meta-item date">
<a class='notlink'>
<i class="fas fa-calendar-alt fa-fw" aria-hidden="true"></i>
<p>发布于:2023年8月9日</p>
</a>
</div>
<div class="new-meta-item browse leancloud">
<a class='notlink'>
<div id="lc-pv" data-title="[论文解读]Mapping natural language commands to web elements" data-path="/nl2we.html">
<i class="fas fa-eye fa-fw" aria-hidden="true"></i>
<span id='number'><i class="fas fa-circle-notch fa-spin fa-fw" aria-hidden="true"></i></span>
次浏览
</div>
</a>
</div>
</div>
</div>
<p>论文地址:<a target="_blank" rel="noopener" href="https://arxiv.org/abs/1808.09132">Mapping natural language commands to web elements</a> 。</p>
<h2 id="摘要">摘要</h2>
<p>Web 提供了一个丰富的开放域环境,具有文本、结构和空间属性。在这个环境中,我们提出了一个需要将语言与环境联系起来的任务(task for grounding language):给定一个自然语言命令(如“点击第二篇文章”),在 web 页面上选择正确的元素(如一个超链接或者文本框)。我们收集了一个数据集,含有超过 50000 条命令,以捕捉不同的现象如功能参考(找到谁创建了这个网站)、关系推理(John 的文章)和视觉推理(最上面的文章)。我们同时实现并分析了三个捕捉数据集中不同现象的基线模型。</p>
<h2 id="研究背景">研究背景</h2>
<p>Web 界面是包括结构化属性与非结构化属性的复杂文档。由于内容和设计的多样性,Web 界面为自然语言联系环境任务(natural language grounding tasks)提供了一个丰富的环境。</p>
<p>具体地,本文考虑将自然语言命令转换为 Web 页面元素的任务。有些命令直接指向元素的文字,但是很多其他的命令需要对 Web 页面多角度的更加复杂的推理,如文本、属性、样式、DOM 中的结构数据和 Web 页面中的空间数据。</p>
<p>本文的工作是语义解析(将自然语言描述映射为像数据库查询和对象操作之类的动作)工作的扩展。这些动作通常在一个固定且已知的环境中进行,而 Web 页面包含更加多样的结构,这让任务具有了更多的可能性。本文的工作可以视为是一种参考游戏(reference game),给定一个自然语言描述,必须找到指定的对象。这个工作的主要难点在于:</p>
<ol>
<li>Web 页面中的元素有多种属性。</li>
<li>为了解析元素,需要结合上下文。</li>
</ol>
<p>基于自然语言的元素识别在现实世界中有多种应用,如它可以为 Web 页面提供一个声音界面,这对视觉障碍者会很有用。另外一个应用是浏览器自动化:自然语言命令比 CSS 和 XPath 选择器更为灵活,并且可以在不同的浏览器之间泛化。</p>
<h2 id="文章贡献">文章贡献</h2>
<div class="gallery ">
<p><img src="images/nl2we/table1.png" class="lazyload" data-srcset="images/nl2we/table1.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=" alt="数据集分布表"></p>
</div>
<p>我们收集了一个含有超过 50000 条自然语言命令的数据集。如图 1 所示,命令涵盖了许多场景,如关系推理、视觉推理以及功能推理。</p>
<p>同时我们还分别基于检索、元素嵌入、文本对齐分别实现了三个模型用于解决该问题。我们的实验分析表明:功能引用、关系引用和视觉推理对从自然语言命令中正确识别元素非常重要。</p>
<h2 id="问题描述">问题描述</h2>
<p>给定一个 Web 页面 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>w</mi></mrow><annotation encoding="application/x-tex">w</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.02691em;">w</span></span></span></span> ,其中包含元素 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>1</mn></msub><mo separator="true">,</mo><mo>⋯</mo><mtext> </mtext><mo separator="true">,</mo><msub><mi>e</mi><mi>k</mi></msub></mrow><annotation encoding="application/x-tex">e_1,\cdots, e_k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="minner">⋯</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> ,再给定一条命令 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> ,本文的任务是找到由 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 描述的元素 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi><mo>∈</mo><mo stretchy="false">{</mo><msub><mi>e</mi><mn>1</mn></msub><mo separator="true">,</mo><mo>⋯</mo><mtext> </mtext><mo separator="true">,</mo><msub><mi>e</mi><mi>k</mi></msub><mo stretchy="false">}</mo></mrow><annotation encoding="application/x-tex">e\in \{e_1,\cdots,e_k\}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.5782em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">e</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">∈</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">{</span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="minner">⋯</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">}</span></span></span></span> 。本文的训练集和测试集就是由若干个 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy="false">(</mo><mi>w</mi><mo separator="true">,</mo><mi>c</mi><mo separator="true">,</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">(w, c, e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">(</span><span class="mord mathdefault" style="margin-right:0.02691em;">w</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 三元组组成。</p>
<h3 id="数据集">数据集</h3>
<p>我们在 1835 个页面上收集了 51663 条命令。收集数据的步骤如下:</p>
<ol>
<li>在 Chorme 中渲染并储存了前 10000 个网站的主页。</li>
<li>记录下 DOM 树和每个元素的几何信息,并且储存渲染好的 Web 页面。</li>
<li>筛选出所有被正确渲染,且没有错误内容的英文页面。</li>
<li>让众包工作者进行头脑风暴,对于每个 Web 页面,想出不同的动作,要求每个动作关联且只关联筛选出的交互元素列表中的一个元素。</li>
</ol>
<p>同时,我们鼓励众包工作者不要使用直接指向元素的单词。最后,将收集到的数据以 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>7</mn><mo>:</mo><mn>2</mn><mo>:</mo><mn>1</mn></mrow><annotation encoding="application/x-tex">7:2:1</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">7</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">:</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">2</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">:</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">1</span></span></span></span> 的比例划分为训练集、验证集和测试集。三个数据集中的页面不会重复。</p>
<p>收集到的页面平均有 1051 个元素,收集到的命令平均长度为 4.1 个 token 。</p>
<h3 id="命令中的现象">命令中的现象</h3>
<p>除了直接指向元素的文本,命令可以以多种方式指向元素,分析了训练集中的 200 个数据,得到的结果如表 1 所示。</p>
<p>即使命令直接指向元素,页面中也会有其他的有着命令中单词的元素。平均来说,不包括停止词,命令与 5.9 个叶节点元素有相同的单词。</p>
<h2 id="解决方法">解决方法</h2>
<p>本节主要介绍解决问题的三个模型:基于检索的模型、基于嵌入的模型和基于对齐的模型。</p>
<h3 id="基于检索的模型">基于检索的模型</h3>
<p>很多命令通过它们的文本内容指向元素。因此可以使用检索模型,基于 TF-IDF 分数,以命令作为搜索请求,找到与其最相关的元素。</p>
<p>具体地,通过以下两种计算,将每个元素表示成一个词袋(bag-of-tokens):</p>
<ol>
<li>标记其文本内容,并进行词干处理。</li>
<li>在标点符号和驼峰命名的边界处标记其属性(id, class, placeholder, label, tooltip, aria-text, name, src, href)</li>
</ol>
<p>在计算频率时,通过一个因子 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>α</mi></mrow><annotation encoding="application/x-tex">\alpha</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span></span> 对属性 token 进行降权处理。在实验中,<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>α</mi></mrow><annotation encoding="application/x-tex">\alpha</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault" style="margin-right:0.0037em;">α</span></span></span></span> 的值被设置为 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>3</mn></mrow><annotation encoding="application/x-tex">3</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">3</span></span></span></span> 。</p>
<p>文档频率通过训练集中的网页进行计算。如果有多个元素具有相同的分数,则通过启发式算法选择最重要的元素,即先序遍历 DOM 树,选择最早出现的元素。</p>
<h3 id="基于嵌入的模型">基于嵌入的模型</h3>
<p>一个常见的匹配两端文字的方法是分别将它们嵌入成向量,并且计算这两个嵌入向量之间的相关分数。对于命令 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 和元素 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mn>1</mn></msub><mo separator="true">,</mo><mo>⋯</mo><mtext> </mtext><mo separator="true">,</mo><msub><mi>e</mi><mi>k</mi></msub></mrow><annotation encoding="application/x-tex">e_1, \cdots, e_k</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.30110799999999993em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight">1</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="minner">⋯</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight" style="margin-right:0.03148em;">k</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> ,定义以下条件概率分布:</p>
<p class="katex-block"><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>p</mi><mo stretchy="false">(</mo><msub><mi>e</mi><mi>i</mi></msub><mi mathvariant="normal">∣</mi><mi>c</mi><mo stretchy="false">)</mo><mo>∝</mo><mi>exp</mi><mo></mo><mo stretchy="false">[</mo><mi>s</mi><mo stretchy="false">(</mo><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo><mo separator="true">,</mo><mi>g</mi><mo stretchy="false">(</mo><msub><mi>e</mi><mi>i</mi></msub><mo stretchy="false">)</mo><mo stretchy="false">)</mo><mo stretchy="false">]</mo></mrow><annotation encoding="application/x-tex">p(e_i|c) \propto \exp[s(f(c), g(e_i))]
</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">p</span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord">∣</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">∝</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mop">exp</span><span class="mopen">[</span><span class="mord mathdefault">s</span><span class="mopen">(</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span><span class="mclose">)</span><span class="mclose">]</span></span></span></span></span></p>
<p>其中 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi></mrow><annotation encoding="application/x-tex">s</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">s</span></span></span></span> 是分数函数,<span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">f(c)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>g</mi><mo stretchy="false">(</mo><msub><mi>e</mi><mi>i</mi></msub><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">g(e_i)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mclose">)</span></span></span></span> 分别是 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>e</mi><mi>i</mi></msub></mrow><annotation encoding="application/x-tex">e_i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.58056em;vertical-align:-0.15em;"></span><span class="mord"><span class="mord mathdefault">e</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.31166399999999994em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">i</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span></span></span></span> 的嵌入向量。这个模型的目的是最大化训练集中正确元素概率的对数值。</p>
<h4 id="命令嵌入">命令嵌入</h4>
<p>为了计算 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">f(c)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span></span></span></span> ,先将 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 中的每个 token 嵌入成固定维度的向量,并且对所有的向量取平均值作为最终的嵌入向量。所有 token 的嵌入向量为 GloVe 向量。</p>
<h4 id="元素嵌入">元素嵌入</h4>
<div class="gallery ">
<p><img src="images/nl2we/fig2.png" class="lazyload" data-srcset="images/nl2we/fig2.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=" alt="元素属性示例"></p>
</div>
<p>为了计算 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>g</mi><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">g(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> ,先对 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi></mrow><annotation encoding="application/x-tex">e</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">e</span></span></span></span> 的所有属性进行嵌入,将得到的结果拼接起来,并应用一个全连接层,得到一个和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">f(c)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span></span></span></span> 维度相同的向量。如图 2 所示,属性包括如下四种:</p>
<ul>
<li><strong>文本内容</strong>:使用命令嵌入函数 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> 对 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi></mrow><annotation encoding="application/x-tex">e</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">e</span></span></span></span> 的文本内容进行嵌入。由于大多数感兴趣元素(链接、按钮、输入框)的文本都较短,因此将文本限制为前 10 个 token 已经足够,并且可以节省内存。</li>
<li><strong>文本属性</strong>:一些属性(aria, title, tooltip, placeholder, label, name)通常包括自然语言,因此我们将这些属性拼接在一起并且使用 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi></mrow><annotation encoding="application/x-tex">f</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8888799999999999em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span></span></span> 得到嵌入结果。</li>
<li><strong>字符串属性</strong>:对于其他的字符串属性(tag, id, class),在标点符号处以及驼峰命名的边界处进行标记。用单独的查找表进行嵌入操作并取平均值。</li>
<li><strong>视觉特征</strong>:建立一个包含元素中心坐标(用网页长宽百分比的方式)和可视属性(布尔值)的向量。</li>
</ul>
<h4 id="分数函数">分数函数</h4>
<p>为了计算 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi><mo stretchy="false">(</mo><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo><mo separator="true">,</mo><mi>g</mi><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">s(f(c), g(e) )</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">s</span><span class="mopen">(</span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span> ,先将 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>f</mi><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">f(c)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>g</mi><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">g(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 标准化得到 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent="true"><mi>f</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">\hat f(c)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.2078799999999998em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.9578799999999998em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span><span style="top:-3.26344em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.08332999999999999em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">\hat g(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 。然后对向量 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo stretchy="false">[</mo><mover accent="true"><mi>f</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo><mo separator="true">;</mo><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo separator="true">;</mo><mover accent="true"><mi>f</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo><mo>⊙</mo><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">]</mo></mrow><annotation encoding="application/x-tex">[\hat f(c); \hat g(e); \hat f(c) \odot \hat g(e)]</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.2078799999999998em;vertical-align:-0.25em;"></span><span class="mopen">[</span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.9578799999999998em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span><span style="top:-3.26344em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.08332999999999999em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mpunct">;</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mpunct">;</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.9578799999999998em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span><span style="top:-3.26344em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.08332999999999999em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">⊙</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">]</span></span></span></span> 应用一个全连接层得到最后的分数,其中 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mo>⊙</mo></mrow><annotation encoding="application/x-tex">\odot</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.66666em;vertical-align:-0.08333em;"></span><span class="mord">⊙</span></span></span></span> 表示逐元素相乘。</p>
<h4 id="融合空间上下文">融合空间上下文</h4>
<p>在某些情况下,上下文十分重要,如:根据已知的邻居标签文本选择文本框,或者给定作者名字,选择作者名字旁边的文章。基于命令确定需要考虑的关联元素是一个富有挑战性的任务。</p>
<p>我们实验性地将空间上下文模块加入了模型。对于每个方向 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>d</mi><mo>∈</mo><mo stretchy="false">{</mo><mrow><mi mathvariant="normal">t</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">p</mi><mo separator="true">,</mo><mi mathvariant="normal">b</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">m</mi><mo separator="true">,</mo><mi mathvariant="normal">l</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">f</mi><mi mathvariant="normal">t</mi><mo separator="true">,</mo><mi mathvariant="normal">r</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">g</mi><mi mathvariant="normal">h</mi><mi mathvariant="normal">t</mi></mrow><mo stretchy="false">}</mo></mrow><annotation encoding="application/x-tex">d\in \{ {\rm top, bottom, left, right}\}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.73354em;vertical-align:-0.0391em;"></span><span class="mord mathdefault">d</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span><span class="mrel">∈</span><span class="mspace" style="margin-right:0.2777777777777778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mopen">{</span><span class="mord"><span class="mord"><span class="mord mathrm">t</span><span class="mord mathrm">o</span><span class="mord mathrm">p</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathrm">b</span><span class="mord mathrm">o</span><span class="mord mathrm">t</span><span class="mord mathrm">t</span><span class="mord mathrm">o</span><span class="mord mathrm">m</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathrm">l</span><span class="mord mathrm">e</span><span class="mord mathrm" style="margin-right:0.07778em;">f</span><span class="mord mathrm">t</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathrm">r</span><span class="mord mathrm">i</span><span class="mord mathrm" style="margin-right:0.01389em;">g</span><span class="mord mathrm">h</span><span class="mord mathrm">t</span></span></span><span class="mclose">}</span></span></span></span>,使用 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>g</mi></mrow><annotation encoding="application/x-tex">g</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.625em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span></span></span> 计算每个方向上最靠近元素 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi></mrow><annotation encoding="application/x-tex">e</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">e</span></span></span></span> 的邻居元素 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>n</mi><mi>d</mi></msub><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">n_d(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord"><span class="mord mathdefault">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 的嵌入。如果这个方向上有多个最近元素,随机选取一个,如果没有,就是 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>0</mn></mrow><annotation encoding="application/x-tex">0</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">0</span></span></span></span> 向量。接下来进行标准化得到 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><msub><mi>n</mi><mi>d</mi></msub><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">\hat g(n_d(e))</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span> ,再将 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><msub><mi>n</mi><mi>d</mi></msub><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">\hat g(n_d(e))</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mover accent="true"><mi>f</mi><mo>^</mo></mover><mo stretchy="false">(</mo><mi>c</mi><mo stretchy="false">)</mo><mo>⊙</mo><mover accent="true"><mi>g</mi><mo>^</mo></mover><mo stretchy="false">(</mo><msub><mi>n</mi><mi>d</mi></msub><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">\hat f(c) \odot \hat g(n_d(e))</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.2078799999999998em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.9578799999999998em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.10764em;">f</span></span><span style="top:-3.26344em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.08332999999999999em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mclose">)</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">⊙</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord accent"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.69444em;"><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="mord mathdefault" style="margin-right:0.03588em;">g</span></span><span style="top:-3em;"><span class="pstrut" style="height:3em;"></span><span class="accent-body" style="left:-0.22222em;"><span class="mord">^</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.19444em;"><span></span></span></span></span></span><span class="mopen">(</span><span class="mord"><span class="mord mathdefault">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span> 拼接起来输入到全连接层。</p>
<h3 id="基于对齐的模型">基于对齐的模型</h3>
<p>基于嵌入的模型的一个缺点在于来自 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 和 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi></mrow><annotation encoding="application/x-tex">e</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">e</span></span></span></span> 的文本 token 不直接交互。为了解决这个问题,本文采用了一种基于单一对齐矩阵的方法。</p>
<p>假设 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">t(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">t</span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 表示 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi></mrow><annotation encoding="application/x-tex">e</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">e</span></span></span></span> 的文本内容和文本属性的拼接,并只保留 10 个 token。我们构建一个矩阵 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>A</mi><mo stretchy="false">(</mo><mi>c</mi><mo separator="true">,</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">A(c, e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">A</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> ,其中 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><msub><mi>A</mi><mrow><mi>i</mi><mi>j</mi></mrow></msub><mo stretchy="false">(</mo><mi>c</mi><mo separator="true">,</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">A_{ij}(c, e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.036108em;vertical-align:-0.286108em;"></span><span class="mord"><span class="mord mathdefault">A</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.311664em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mathdefault mtight">i</span><span class="mord mathdefault mtight" style="margin-right:0.05724em;">j</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.286108em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 是 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi></mrow><annotation encoding="application/x-tex">c</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.43056em;vertical-align:0em;"></span><span class="mord mathdefault">c</span></span></span></span> 第 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi></mrow><annotation encoding="application/x-tex">i</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.65952em;vertical-align:0em;"></span><span class="mord mathdefault">i</span></span></span></span> 个 token 的嵌入向量与 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>t</mi><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">t(e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">t</span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 第 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>j</mi></mrow><annotation encoding="application/x-tex">j</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.85396em;vertical-align:-0.19444em;"></span><span class="mord mathdefault" style="margin-right:0.05724em;">j</span></span></span></span> 个 token 的嵌入向量的点积。然后在矩阵上应用两个 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>3</mn><mo>×</mo><mn>3</mn></mrow><annotation encoding="application/x-tex">3\times 3</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.72777em;vertical-align:-0.08333em;"></span><span class="mord">3</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">×</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">3</span></span></span></span> 的卷积层和一个 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>2</mn><mo>×</mo><mn>2</mn></mrow><annotation encoding="application/x-tex">2\times 2</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.72777em;vertical-align:-0.08333em;"></span><span class="mord">2</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span><span class="mbin">×</span><span class="mspace" style="margin-right:0.2222222222222222em;"></span></span><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">2</span></span></span></span> 的最大池化层,将结果与 tag 的嵌入连接起来,输入到一个全连接层中,得到一个 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>10</mn></mrow><annotation encoding="application/x-tex">10</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.64444em;vertical-align:0em;"></span><span class="mord">1</span><span class="mord">0</span></span></span></span> 维向量 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>h</mi><mo stretchy="false">(</mo><mi>c</mi><mo separator="true">,</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">h(c, e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">h</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 。</p>
<p>最后对 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>h</mi><mo stretchy="false">(</mo><mi>c</mi><mo separator="true">,</mo><mi>e</mi><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">h(c, e)</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">h</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord mathdefault">e</span><span class="mclose">)</span></span></span></span> 应用一个单独的全连接层得到一个标量分数,然后使用与基于嵌入的模型相同的目标函数进行训练。为了考虑上下文,简单地将 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>h</mi><mo stretchy="false">(</mo><mi>c</mi><mo separator="true">,</mo><msub><mi>n</mi><mi>d</mi></msub><mo stretchy="false">(</mo><mi>e</mi><mo stretchy="false">)</mo><mo stretchy="false">)</mo></mrow><annotation encoding="application/x-tex">h(c, n_d(e))</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathdefault">h</span><span class="mopen">(</span><span class="mord mathdefault">c</span><span class="mpunct">,</span><span class="mspace" style="margin-right:0.16666666666666666em;"></span><span class="mord"><span class="mord mathdefault">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.33610799999999996em;"><span style="top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathdefault mtight">d</span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mopen">(</span><span class="mord mathdefault">e</span><span class="mclose">)</span><span class="mclose">)</span></span></span></span> 拼接在一起输入到最后的全连接层。</p>
<h2 id="实验验证">实验验证</h2>
<p>本文实验的指标为准确率,即在所有样本中,模型正确选择了对应元素的样本所占的比例。使用 Adam 优化器训练模型,学习率设置为 <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mn>1</mn><msup><mn>0</mn><mrow><mo>−</mo><mn>3</mn></mrow></msup></mrow><annotation encoding="application/x-tex">10^{-3}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8141079999999999em;vertical-align:0em;"></span><span class="mord">1</span><span class="mord"><span class="mord">0</span><span class="msupsub"><span class="vlist-t"><span class="vlist-r"><span class="vlist" style="height:0.8141079999999999em;"><span style="top:-3.063em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord mtight">−</span><span class="mord mtight">3</span></span></span></span></span></span></span></span></span></span></span></span> ,并且基于验证集使用 earlystop 策略。模型可以选择页面上渲染出来的任何元素。</p>
<p>实验结果如表 2 所示,两个神经模型的效果都要明显好于检索模型。</p>
<div class="gallery ">
<p><img src="images/nl2we/table2.png" class="lazyload" data-srcset="images/nl2we/table2.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=" alt="实验结果"></p>
</div>
<h3 id="消融分析">消融分析</h3>
<p>为了衡量网页中每种信息的重要性,本文进行了消融实验,依次让模型不能观测到某一种元素:文本内容、属性和空间上下文。</p>
<p>如表 2 结果所示:</p>
<ul>
<li>文本内容是最重要的输入信号。</li>
<li>在嵌入模型和对齐模型中,属性也发挥了很重要的作用。</li>
<li>空间上下文信息提升了对齐模型的效果,但是提升幅度很小。这说明由于上下文类型的多样性和信号的稀疏性,将适当的上下文融入模型是一项具有挑战性的任务。</li>
</ul>
<h3 id="错误分析">错误分析</h3>
<p>为了更好地理解模型如何处理不同的问题,我们在验证集的 100 个嵌入模型和对齐模型出错的样本上进行了分析,结果如表 3 所示:</p>
<div class="gallery ">
<p><img src="images/nl2we/table3.png" class="lazyload" data-srcset="images/nl2we/table3.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=" alt="错误分析结果"></p>
</div>
<p>接下来分别详细阐述每种错误。</p>
<h4 id="无法匹配字符串">无法匹配字符串</h4>
<p>很多命令只是简单地指向元素的文本内容(如“click customised garages”指向链接文本“Customised Garages, Canopies & Carports”)。嵌入模型将整个命令嵌入成一个单独的向量,这让它在部分情况下会无法选择只有部分文本匹配的元素;相比之下,对齐模型明确地模拟了文本匹配,因此在这种情况下效果更好。</p>
<h4 id="错误匹配字符串">错误匹配字符串</h4>
<p>由于对文本匹配的依赖,当很多元素与命令有相同的子串时(命令为“shop for knitwear”,并且有很多元素包含单词“shop”),或者当有匹配子串的元素不是正确目标时(“get the program”应当对应“Download”,而不是“Microsoft developer program”),对齐模型会遇到困难。</p>
<h4 id="无法理解表达">无法理解表达</h4>
<p>许多命令利用释义、目标描述和元素特性间接地描述元素。在这种情况下,将元素的不同属性总结为一个嵌入向量的嵌入模型效果就会比对齐模型更好,但是在一些更困难的样本上(“please close this notice for me”指向带有文字“Hide”叉按钮)也会出错。</p>
<h4 id="无法进行推理">无法进行推理</h4>
<p>大多数情况下,模型无法进行关系、顺序和空间推理。最常见的错误是当目标是一个文本框,但是命令使用了文本框附近的标签作为参考。虽然一些文本框提供可供模型使用的语义注释(如提示工具或者界面特性),但是很多网页并不提供此类注释。为了解决这些问题,模型必须基于逻辑上下文或空间上下文辨别文本框的标签。</p>
<h4 id="其他错误">其他错误</h4>
<p>除了注释噪声以外,有时候网页上会有多个元素满足给定的命令(如“log in”可以匹配页面上的任意“Sign In”按钮)。在这种情况下,注释通常会给出所有候选元素中最“突出”的元素。为了给用户提供更加自然的界面,模型应当可以学习预测这些突出的元素,而不是更模糊的元素。</p>
<div class='footer'>
<div class='copyright'>
<blockquote>
<p>博客内容遵循 署名-非商业性使用-相同方式共享 4.0 国际 (CC BY-NC-SA 4.0) 协议</p>
<p>本文永久链接是:<a href=https://lornd.top/nl2we.html>https://lornd.top/nl2we.html</a></p>
</blockquote>
</div>
</div>
<div class='article-meta' id="bottom">
<div class='new-meta-box'>
<div class="new-meta-item date" itemprop="dateUpdated" datetime="2023-09-15T17:05:49+08:00">
<a class='notlink'>
<i class="fas fa-edit fa-fw" aria-hidden="true"></i>
<p>更新于:2023年9月15日</p>
</a>
</div>
<div class="new-meta-item share -mob-share-list">
<div class="-mob-share-list share-body">
<a class="-mob-share-qq" title="" rel="external nofollow noopener noreferrer noopener"
target="_blank" href="http://connect.qq.com/widget/shareqq/index.html?url=https://lornd.top/nl2we.html&title=[论文解读]Mapping natural language commands to web elements - lornd's blog&summary="
>
<img src="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/qq.png" class="lazyload" data-srcset="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/qq.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=">
</a>
<a class="-mob-share-qzone" title="" rel="external nofollow noopener noreferrer noopener"
target="_blank" href="https://sns.qzone.qq.com/cgi-bin/qzshare/cgi_qzshare_onekey?url=https://lornd.top/nl2we.html&title=[论文解读]Mapping natural language commands to web elements - lornd's blog&summary="
>
<img src="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/qzone.png" class="lazyload" data-srcset="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/qzone.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=">
</a>
<a class="-mob-share-weibo" title="" rel="external nofollow noopener noreferrer noopener"
target="_blank" href="http://service.weibo.com/share/share.php?url=https://lornd.top/nl2we.html&title=[论文解读]Mapping natural language commands to web elements - lornd's blog&summary="
>
<img src="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/weibo.png" class="lazyload" data-srcset="https://cdn.jsdelivr.net/gh/volantis-x/cdn-org/logo/128/weibo.png" srcset="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAABGdBTUEAALGPC/xhBQAAADhlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAAqACAAQAAAABAAAAAaADAAQAAAABAAAAAQAAAADa6r/EAAAAC0lEQVQIHWNgAAIAAAUAAY27m/MAAAAASUVORK5CYII=">
</a>
</div>
</div>
</div>
</div>
<div class="prev-next">
<a class='prev' href='/qweb.html'>
<p class='title'><i class="fas fa-chevron-left" aria-hidden="true"></i>[论文解读]learning to navigate the web</p>
<p class='content'>论文地址:learning to navigate the web 。
摘要
在有着巨大的状态空间和动作空间,以及稀疏奖励的环境中进行学习,会阻碍强化学习智能体通过试错的学习过程。例如,在网页上...</p>
</a>
<a class='next' href='/NL2API.html'>
<p class='title'>[论文解读]Building Natural Language Interfaces to Web APIs<i class="fas fa-chevron-right" aria-hidden="true"></i></p>
<p class='content'>论文地址:Building Natural Language Interfaces to Web APIs 。
摘要
随着 Web 面向服务架构的发展,应用程序接口(application pr...</p>
</a>
</div>
</article>
<article class="post white-box reveal shadow" id="comments">
<p ct><i class='fas fa-comments'></i> 评论</p>
<div id="valine_container" class="valine_thread">
<i class="fas fa-cog fa-spin fa-fw fa-2x"></i>
</div>
</article>
<script>
// https://github.com/theme-next/hexo-theme-next/blob/master/layout/_third-party/math/mathjax.swig
if (typeof MathJax === 'undefined') {
window.MathJax = {
loader: {
source: {
'[tex]/amsCd': '[tex]/amscd',
'[tex]/AMScd': '[tex]/amscd'
}
},
tex: {
inlineMath: {'[+]': [['$', '$']]},
tags: 'ams'
},
options: {
renderActions: {
findScript: [10, doc => {
document.querySelectorAll('script[type^="math/tex"]').forEach(node => {
const display = !!node.type.match(/; *mode=display/);
const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display);
const text = document.createTextNode('');
node.parentNode.replaceChild(text, node);
math.start = {node: text, delim: '', n: 0};
math.end = {node: text, delim: '', n: 0};
doc.math.push(math);
});
}, '', false],
insertedScript: [200, () => {
document.querySelectorAll('mjx-container').forEach(node => {
let target = node.parentNode;
if (target.nodeName.toLowerCase() === 'li') {
target.parentNode.classList.add('has-jax');
}
});
}, '', false]
}
}
};
(function () {
var script = document.createElement('script');
script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3.0/es5/tex-mml-chtml.js';
script.defer = true;
document.head.appendChild(script);
})();
} else {
// 文章章节标题不能为 “MathJax” ,否则会报错。
MathJax.startup.document.state(0);
MathJax.texReset();
MathJax.typeset();
}
</script>
</div>
<aside class='l_side'>
<section class="widget toc-wrapper shadow desktop mobile" id="toc-div" >
<header>
<i class="fas fa-list fa-fw" aria-hidden="true"></i><span class='name'>本文目录</span>
</header>
<div class='content'>
<ol class="toc"><li class="toc-item toc-level-2"><a class="toc-link" href="#%E6%91%98%E8%A6%81"><span class="toc-text">摘要</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E7%A0%94%E7%A9%B6%E8%83%8C%E6%99%AF"><span class="toc-text">研究背景</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E6%96%87%E7%AB%A0%E8%B4%A1%E7%8C%AE"><span class="toc-text">文章贡献</span></a></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E9%97%AE%E9%A2%98%E6%8F%8F%E8%BF%B0"><span class="toc-text">问题描述</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%95%B0%E6%8D%AE%E9%9B%86"><span class="toc-text">数据集</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%91%BD%E4%BB%A4%E4%B8%AD%E7%9A%84%E7%8E%B0%E8%B1%A1"><span class="toc-text">命令中的现象</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95"><span class="toc-text">解决方法</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%9F%BA%E4%BA%8E%E6%A3%80%E7%B4%A2%E7%9A%84%E6%A8%A1%E5%9E%8B"><span class="toc-text">基于检索的模型</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%9F%BA%E4%BA%8E%E5%B5%8C%E5%85%A5%E7%9A%84%E6%A8%A1%E5%9E%8B"><span class="toc-text">基于嵌入的模型</span></a><ol class="toc-child"><li class="toc-item toc-level-4"><a class="toc-link" href="#%E5%91%BD%E4%BB%A4%E5%B5%8C%E5%85%A5"><span class="toc-text">命令嵌入</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E5%85%83%E7%B4%A0%E5%B5%8C%E5%85%A5"><span class="toc-text">元素嵌入</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E5%88%86%E6%95%B0%E5%87%BD%E6%95%B0"><span class="toc-text">分数函数</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E8%9E%8D%E5%90%88%E7%A9%BA%E9%97%B4%E4%B8%8A%E4%B8%8B%E6%96%87"><span class="toc-text">融合空间上下文</span></a></li></ol></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E5%9F%BA%E4%BA%8E%E5%AF%B9%E9%BD%90%E7%9A%84%E6%A8%A1%E5%9E%8B"><span class="toc-text">基于对齐的模型</span></a></li></ol></li><li class="toc-item toc-level-2"><a class="toc-link" href="#%E5%AE%9E%E9%AA%8C%E9%AA%8C%E8%AF%81"><span class="toc-text">实验验证</span></a><ol class="toc-child"><li class="toc-item toc-level-3"><a class="toc-link" href="#%E6%B6%88%E8%9E%8D%E5%88%86%E6%9E%90"><span class="toc-text">消融分析</span></a></li><li class="toc-item toc-level-3"><a class="toc-link" href="#%E9%94%99%E8%AF%AF%E5%88%86%E6%9E%90"><span class="toc-text">错误分析</span></a><ol class="toc-child"><li class="toc-item toc-level-4"><a class="toc-link" href="#%E6%97%A0%E6%B3%95%E5%8C%B9%E9%85%8D%E5%AD%97%E7%AC%A6%E4%B8%B2"><span class="toc-text">无法匹配字符串</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E9%94%99%E8%AF%AF%E5%8C%B9%E9%85%8D%E5%AD%97%E7%AC%A6%E4%B8%B2"><span class="toc-text">错误匹配字符串</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E6%97%A0%E6%B3%95%E7%90%86%E8%A7%A3%E8%A1%A8%E8%BE%BE"><span class="toc-text">无法理解表达</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E6%97%A0%E6%B3%95%E8%BF%9B%E8%A1%8C%E6%8E%A8%E7%90%86"><span class="toc-text">无法进行推理</span></a></li><li class="toc-item toc-level-4"><a class="toc-link" href="#%E5%85%B6%E4%BB%96%E9%94%99%E8%AF%AF"><span class="toc-text">其他错误</span></a></li></ol></li></ol></li></ol>
</div>
</section>
</aside>
<!--此文件用来存放一些不方便取值的变量-->
<!--思路大概是将值藏到重加载的区域内-->
<script>
window.pdata={}
pdata.ispage=true;
pdata.postTitle="[论文解读]Mapping natural language commands to web elements";
pdata.commentPath="";
pdata.commentPlaceholder="";
// header 这里无论是否开启pjax都需要
var l_header=document.getElementById("l_header");
l_header.classList.add("show");
// cover
var cover_wrapper=document.querySelector('.cover-wrapper');
cover_wrapper.id="none";
cover_wrapper.style.display="none";
</script>
</div>
<footer class="footer clearfix">
<br><br>
<div class="aplayer-container">
</div>
<br>
<div class="social-wrapper">
</div>
<div><p>博客内容遵循 <a target="_blank" rel="noopener" href="https://creativecommons.org/licenses/by-nc-sa/4.0/deed.zh">署名-非商业性使用-相同方式共享 4.0 国际 (CC BY-NC-SA 4.0) 协议</a></p>
</div>
<div><p><span id="lc-sv">本站总访问量为 <span id='number'><i class="fas fa-circle-notch fa-spin fa-fw" aria-hidden="true"></i></span> 次</span> <span id="lc-uv">访客数为 <span id='number'><i class="fas fa-circle-notch fa-spin fa-fw" aria-hidden="true"></i></span> 人</span></p>
</div>
本站使用
<a href="https://github.com/volantis-x/hexo-theme-volantis/tree/4.3.1" target="_blank" class="codename">Volantis</a>
作为主题
<div class='copyright'>
<p><a href="/">Copyright © 2017-2020 XXX</a></p>
</div>
</footer>
<a id="s-top" class="fas fa-arrow-up fa-fw" href="javascript:void(0)"></a>
</div>
</div>
<div>
<script>
/************这个文件存放不需要重载的全局变量和全局函数*********/
window.volantis={};
window.volantis.loadcss=document.getElementById("loadcss");
/******************** Pjax ********************************/
function VPjax(){
this.list=[] // 存放回调函数
this.start=()=>{
for(var i=0;i<this.list.length;i++){
this.list[i].run();
}
}
this.push=(fn,name)=>{
var f=new PjaxItem(fn,name);
this.list.push(f);
}
// 构造一个可以run的对象
function PjaxItem(fn,name){
// 函数名称
this.name = name || fn.name
// run方法
this.run=()=>{
fn()
}
}
}
volantis.pjax={}
volantis.pjax.method={
complete: new VPjax(),
error: new VPjax(),
send: new VPjax()
}
volantis.pjax={
...volantis.pjax,
push: volantis.pjax.method.complete.push,
error: volantis.pjax.method.error.push,
send: volantis.pjax.method.send.push
}
/********************脚本懒加载函数********************************/
// 已经加入了setTimeout
function loadScript(src, cb) {
setTimeout(function() {
var HEAD = document.getElementsByTagName('head')[0] || document.documentElement;
var script = document.createElement('script');
script.setAttribute('type','text/javascript');
if (cb) script.onload = cb;
script.setAttribute('src', src);
HEAD.appendChild(script);
});
}
//https://github.com/filamentgroup/loadCSS
var loadCSS = function( href, before, media, attributes ){
var doc = window.document;
var ss = doc.createElement( "link" );
var ref;
if( before ){
ref = before;
}
else {
var refs = ( doc.body || doc.getElementsByTagName( "head" )[ 0 ] ).childNodes;
ref = refs[ refs.length - 1];
}
var sheets = doc.styleSheets;
if( attributes ){
for( var attributeName in attributes ){
if( attributes.hasOwnProperty( attributeName ) ){
ss.setAttribute( attributeName, attributes[attributeName] );
}
}
}
ss.rel = "stylesheet";
ss.href = href;
ss.media = "only x";
function ready( cb ){
if( doc.body ){
return cb();
}
setTimeout(function(){
ready( cb );
});
}
ready( function(){
ref.parentNode.insertBefore( ss, ( before ? ref : ref.nextSibling ) );
});
var onloadcssdefined = function( cb ){
var resolvedHref = ss.href;
var i = sheets.length;
while( i-- ){
if( sheets[ i ].href === resolvedHref ){
return cb();
}
}
setTimeout(function() {
onloadcssdefined( cb );
});
};
function loadCB(){
if( ss.addEventListener ){
ss.removeEventListener( "load", loadCB );
}
ss.media = media || "all";
}
if( ss.addEventListener ){
ss.addEventListener( "load", loadCB);
}
ss.onloadcssdefined = onloadcssdefined;
onloadcssdefined( loadCB );
return ss;
};
</script>
<script>
loadCSS("https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.14/css/all.min.css", window.volantis.loadcss);
</script>
<!-- required -->
<script src="https://cdn.jsdelivr.net/npm/jquery@3.5/dist/jquery.min.js"></script>
<script>
function pjax_fancybox() {
$(".md .gallery").find("img").each(function () { //渲染 fancybox
var element = document.createElement("a"); // a 标签
$(element).attr("class", "fancybox");
$(element).attr("pjax-fancybox", ""); // 过滤 pjax
$(element).attr("href", $(this).attr("src"));
if ($(this).attr("data-original")) {
$(element).attr("href", $(this).attr("data-original"));
}
$(element).attr("data-fancybox", "images");
var caption = ""; // 描述信息
if ($(this).attr('alt')) { // 判断当前页面是否存在描述信息
$(element).attr('data-caption', $(this).attr('alt'));
caption = $(this).attr('alt');
}
var div = document.createElement("div");
$(div).addClass("fancybox");
$(this).wrap(div); // 最外层套 div ,其实主要作用还是 class 样式
var span = document.createElement("span");
$(span).addClass("image-caption");
$(span).text(caption); // 加描述
$(this).after(span); // 再套一层描述
$(this).wrap(element); // 最后套 a 标签
})
$(".md .gallery").find("img").fancybox({
selector: '[data-fancybox="images"]',
hash: false,
loop: false,
closeClick: true,
helpers: {
overlay: {closeClick: true}
},
buttons: [
"zoom",
"close"
]
});
};
function SCload_fancybox() {
if ($(".md .gallery").find("img").length == 0) return;
loadCSS("https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@3.5.7/dist/jquery.fancybox.min.css", document.getElementById("loadcss"));
loadScript('https://cdn.jsdelivr.net/gh/fancyapps/fancybox@3.5.7/dist/jquery.fancybox.min.js', pjax_fancybox)
};
$(function () {
SCload_fancybox();
});
function Pjax_SCload_fancybox(){
if (typeof $.fancybox == "undefined") {
SCload_fancybox();
} else {
pjax_fancybox();
}
}
volantis.pjax.push(Pjax_SCload_fancybox)
volantis.pjax.send(()=>{
if (typeof $.fancybox != "undefined") {
$.fancybox.close(); // 关闭弹窗
}
},'fancybox')
</script>
<!-- internal -->
<script>
function loadIssuesJS() {
if ($(".md").find(".issues-api").length == 0) return;
loadScript('/js/issues.js');
};
$(function () {
loadIssuesJS();
});
volantis.pjax.push(()=>{
if (typeof IssuesAPI == "undefined") {
loadIssuesJS();
}
},"IssuesJS")
</script>
<script defer src="https://cdn.jsdelivr.net/npm/vanilla-lazyload@17.1.0/dist/lazyload.min.js"></script>
<script>
// https://www.npmjs.com/package/vanilla-lazyload
// Set the options globally
// to make LazyLoad self-initialize
window.lazyLoadOptions = {
elements_selector: ".lazyload",
threshold: 0
};
// Listen to the initialization event
// and get the instance of LazyLoad
window.addEventListener(
"LazyLoad::Initialized",
function (event) {
window.lazyLoadInstance = event.detail.instance;
},
false
);
document.addEventListener('DOMContentLoaded', function () {
lazyLoadInstance.update();
});
document.addEventListener('pjax:complete', function () {
lazyLoadInstance.update();
});
</script>
<script>
window.FPConfig = {
delay: 0,
ignoreKeywords: [],
maxRPS: 5,
hoverDelay: 25
};
</script>
<script defer src="https://cdn.jsdelivr.net/gh/gijo-varghese/flying-pages@2.1.2/flying-pages.min.js"></script>