(
= Paper PDF,
= Presentation slides,
= Presentation video)
Balreet Grewal; James Graham; Jeff Muizelaar; Jan Odvarko; Suhaib Mujahid; Marco Castelluccio; Cor-Paul Bezemer
XBIDetective: Leveraging Vision Language Models for Identifying Cross-Browser Visual Inconsistencies Inproceedings
International Conference on Software Engineering - Software Engineering in Practice (ICSE - SEIP) Track, 2026, 2026.
Abstract | BibTeX | Tags: Regression testing, Testing, Web applications
@inproceedings{balreet_xbidetective,
title = {XBIDetective: Leveraging Vision Language Models for Identifying Cross-Browser Visual Inconsistencies},
author = {Balreet Grewal and James Graham and Jeff Muizelaar and Jan Odvarko and Suhaib Mujahid and Marco Castelluccio and Cor-Paul Bezemer},
year = {2026},
date = {2026-04-01},
urldate = {2026-04-01},
booktitle = {International Conference on Software Engineering - Software Engineering in Practice (ICSE - SEIP) Track, 2026},
abstract = {Browser rendering bugs can be challenging to detect for browser de-
velopers, as they may be triggered by very specific conditions that
are exhibited on only a very small subset of websites. Cross-browser
inconsistencies (XBIs), variations in how a website is interpreted
and displayed on different browsers, can be helpful guides to detect
such rendering bugs. Although visual and Document Object Model
(DOM)-based analysis techniques exist for detecting XBIs, they
often struggle with dynamic and interactive elements. In this study,
we discuss our industry experience with using vision language
models (VLMs) to identify XBIs. We present the XBIDetective tool
which automatically captures screenshots of a website in Mozilla
Firefox and Google Chrome, and analyzes them with a VLM for XBIs.
We evaluate XBIDetective’s performance with an off-the-shelf and
a fine-tuned VLM on 1,052 websites. We show that XBIDetective
can identify cross-browser discrepancies with 79% accuracy and
detect dynamic elements and advertisements with 84% and 85%
accuracy, respectively, when using the fine-tuned VLM. We discuss
important lessons learned, and we present several potential prac-
tical use cases for XBIDetective, including automated regression
testing, large-scale monitoring of websites, and rapid triaging of
XBI bug reports.},
keywords = {Regression testing, Testing, Web applications},
pubstate = {published},
tppubtype = {inproceedings}
}
velopers, as they may be triggered by very specific conditions that
are exhibited on only a very small subset of websites. Cross-browser
inconsistencies (XBIs), variations in how a website is interpreted
and displayed on different browsers, can be helpful guides to detect
such rendering bugs. Although visual and Document Object Model
(DOM)-based analysis techniques exist for detecting XBIs, they
often struggle with dynamic and interactive elements. In this study,
we discuss our industry experience with using vision language
models (VLMs) to identify XBIs. We present the XBIDetective tool
which automatically captures screenshots of a website in Mozilla
Firefox and Google Chrome, and analyzes them with a VLM for XBIs.
We evaluate XBIDetective’s performance with an off-the-shelf and
a fine-tuned VLM on 1,052 websites. We show that XBIDetective
can identify cross-browser discrepancies with 79% accuracy and
detect dynamic elements and advertisements with 84% and 85%
accuracy, respectively, when using the fine-tuned VLM. We discuss
important lessons learned, and we present several potential prac-
tical use cases for XBIDetective, including automated regression
testing, large-scale monitoring of websites, and rapid triaging of
XBI bug reports.
Finlay Macklon; Markos Viggiato; Natalia Romanova; Chris Buzon; Dale Paas; Cor-Paul Bezemer
A Taxonomy of Testable HTML5 Canvas Issues Journal Article
Transactions of Software Engineering (TSE), 49 (6), pp. 3647–3659, 2023.
Abstract | BibTeX | Tags: Testing, Web applications
@article{MacklonTSE2023,
title = {A Taxonomy of Testable HTML5 Canvas Issues},
author = {Finlay Macklon and Markos Viggiato and Natalia Romanova and Chris Buzon and Dale Paas and Cor-Paul Bezemer},
year = {2023},
date = {2023-06-01},
urldate = {2023-06-01},
journal = {Transactions of Software Engineering (TSE)},
volume = {49},
number = {6},
pages = {3647--3659},
abstract = {The HTML5 canvas is widely used to display high quality graphics in web applications. However, the combination of
web, GUI, and visual techniques that are required to build canvas applications, together with the lack of testing and debugging
tools, makes developing such applications very challenging. To help direct future research on testing canvas applications, in this
paper we present a taxonomy of testable canvas issues. First, we extracted 2,403 canvas related issue reports from 123 open
source GitHub projects that use the HTML5 canvas. Second, we constructed our taxonomy by manually classifying a random
sample of 332 issue reports. Our manual classification identified five broad categories of testable canvas issues, such as Visual
and Performance issues. We found that Visual issues are the most frequent (35%), while Performance issues are relatively infrequent
(5%). We also found that many testable canvas issues that present themselves visually on the canvas are actually caused by
other components of the web application. Our taxonomy of testable canvas issues can be used to steer future research into
canvas issues and testing.},
keywords = {Testing, Web applications},
pubstate = {published},
tppubtype = {article}
}
web, GUI, and visual techniques that are required to build canvas applications, together with the lack of testing and debugging
tools, makes developing such applications very challenging. To help direct future research on testing canvas applications, in this
paper we present a taxonomy of testable canvas issues. First, we extracted 2,403 canvas related issue reports from 123 open
source GitHub projects that use the HTML5 canvas. Second, we constructed our taxonomy by manually classifying a random
sample of 332 issue reports. Our manual classification identified five broad categories of testable canvas issues, such as Visual
and Performance issues. We found that Visual issues are the most frequent (35%), while Performance issues are relatively infrequent
(5%). We also found that many testable canvas issues that present themselves visually on the canvas are actually caused by
other components of the web application. Our taxonomy of testable canvas issues can be used to steer future research into
canvas issues and testing.
Finlay Macklon; Mohammad Reza Taesiri; Markos Viggiato; Stefan Antoszko; Natalia Romanova; Dale Paas; Cor-Paul Bezemer
Automatically Detecting Visual Bugs in HTML5 <canvas> Games Inproceedings
37th IEEE/ACM International Conference on Automated Software Engineering (ASE), 2022.
BibTeX | Tags: Computer games, Game development, Gaming, Regression testing, Testing, Web applications
@inproceedings{finlay_ase2022,
title = {Automatically Detecting Visual Bugs in HTML5 Cor-Paul Bezemer; Ali Mesbah; Arie van Deursen
Automated Security Testing of Web Widget Interactions Inproceedings
European Software Engineering Conference/ACM SIGSOFT International Symposium on the Foundations of Software Engineering (ESEC/FSE), pp. 81-90, 2009.
Abstract | BibTeX | Tags: Security testing, Web applications
@inproceedings{cp_fse,
title = {Automated Security Testing of Web Widget Interactions},
author = {Cor-Paul Bezemer and Ali Mesbah and Arie van Deursen},
year = {2009},
date = {2009-08-24},
urldate = {2009-08-24},
booktitle = {European Software Engineering Conference/ACM SIGSOFT International Symposium on the Foundations of Software Engineering (ESEC/FSE)},
pages = {81-90},
abstract = {We present a technique for automatically detecting security vulnerabilities in client-side self-contained components, called web widgets, that can co-exist independently on a single web page. In this paper we focus on two security scenarios, namely the case in which (1) a malicious widget changes the content (DOM) of another widget, and (2) a widget steals data from another widget and sends it to the server via an HTTP request. We propose a dynamic analysis approach for automatically executing the web application and analyzing the runtime changes in the user interface, as well as the outgoing HTTP calls, to detect inter-widget interaction violations.
Our approach, implemented in a number of open source Atusa plugins, called Diva, requires no modification of application code, and has few false positives. We discuss the results of an empirical evaluation of the violation revealing capabilities, performance, and scalability of our approach, by means of two case studies, on the Exact Widget Framework and Pageflakes, a commercial, widely used widget framework.},
keywords = {Security testing, Web applications},
pubstate = {published},
tppubtype = {inproceedings}
}
Our approach, implemented in a number of open source Atusa plugins, called Diva, requires no modification of application code, and has few false positives. We discuss the results of an empirical evaluation of the violation revealing capabilities, performance, and scalability of our approach, by means of two case studies, on the Exact Widget Framework and Pageflakes, a commercial, widely used widget framework.
