(
= Paper PDF,
= Presentation slides,
= Presentation video)
Balreet Grewal; James Graham; Jeff Muizelaar; Jan Odvarko; Suhaib Mujahid; Marco Castelluccio; Cor-Paul Bezemer
XBIDetective: Leveraging Vision Language Models for Identifying Cross-Browser Visual Inconsistencies Inproceedings
International Conference on Software Engineering - Software Engineering in Practice (ICSE - SEIP) Track, 2026, 2026.
Abstract | BibTeX | Tags: Regression testing, Testing, Web applications
@inproceedings{balreet_xbidetective,
title = {XBIDetective: Leveraging Vision Language Models for Identifying Cross-Browser Visual Inconsistencies},
author = {Balreet Grewal and James Graham and Jeff Muizelaar and Jan Odvarko and Suhaib Mujahid and Marco Castelluccio and Cor-Paul Bezemer},
year = {2026},
date = {2026-04-01},
urldate = {2026-04-01},
booktitle = {International Conference on Software Engineering - Software Engineering in Practice (ICSE - SEIP) Track, 2026},
abstract = {Browser rendering bugs can be challenging to detect for browser de-
velopers, as they may be triggered by very specific conditions that
are exhibited on only a very small subset of websites. Cross-browser
inconsistencies (XBIs), variations in how a website is interpreted
and displayed on different browsers, can be helpful guides to detect
such rendering bugs. Although visual and Document Object Model
(DOM)-based analysis techniques exist for detecting XBIs, they
often struggle with dynamic and interactive elements. In this study,
we discuss our industry experience with using vision language
models (VLMs) to identify XBIs. We present the XBIDetective tool
which automatically captures screenshots of a website in Mozilla
Firefox and Google Chrome, and analyzes them with a VLM for XBIs.
We evaluate XBIDetective’s performance with an off-the-shelf and
a fine-tuned VLM on 1,052 websites. We show that XBIDetective
can identify cross-browser discrepancies with 79% accuracy and
detect dynamic elements and advertisements with 84% and 85%
accuracy, respectively, when using the fine-tuned VLM. We discuss
important lessons learned, and we present several potential prac-
tical use cases for XBIDetective, including automated regression
testing, large-scale monitoring of websites, and rapid triaging of
XBI bug reports.},
keywords = {Regression testing, Testing, Web applications},
pubstate = {published},
tppubtype = {inproceedings}
}
velopers, as they may be triggered by very specific conditions that
are exhibited on only a very small subset of websites. Cross-browser
inconsistencies (XBIs), variations in how a website is interpreted
and displayed on different browsers, can be helpful guides to detect
such rendering bugs. Although visual and Document Object Model
(DOM)-based analysis techniques exist for detecting XBIs, they
often struggle with dynamic and interactive elements. In this study,
we discuss our industry experience with using vision language
models (VLMs) to identify XBIs. We present the XBIDetective tool
which automatically captures screenshots of a website in Mozilla
Firefox and Google Chrome, and analyzes them with a VLM for XBIs.
We evaluate XBIDetective’s performance with an off-the-shelf and
a fine-tuned VLM on 1,052 websites. We show that XBIDetective
can identify cross-browser discrepancies with 79% accuracy and
detect dynamic elements and advertisements with 84% and 85%
accuracy, respectively, when using the fine-tuned VLM. We discuss
important lessons learned, and we present several potential prac-
tical use cases for XBIDetective, including automated regression
testing, large-scale monitoring of websites, and rapid triaging of
XBI bug reports.
Finlay Macklon; Mohammad Reza Taesiri; Markos Viggiato; Stefan Antoszko; Natalia Romanova; Dale Paas; Cor-Paul Bezemer
Automatically Detecting Visual Bugs in HTML5 <canvas> Games Inproceedings
37th IEEE/ACM International Conference on Automated Software Engineering (ASE), 2022.
BibTeX | Tags: Computer games, Game development, Gaming, Regression testing, Testing, Web applications
@inproceedings{finlay_ase2022,
title = {Automatically Detecting Visual Bugs in HTML5 Simon Eismann; Cor-Paul Bezemer; Weiyi Shang; Dušan Okanović; André van Hoorn
Microservices: A Performance Tester's Dream or Nightmare? Inproceedings
ACM/SPEC International Conference on Performance Engineering (ICPE), pp. 1–12, 2020.
Abstract | BibTeX | Tags: DevOps, Microservices, Performance, Regression testing
@inproceedings{Simon20,
title = {Microservices: A Performance Tester's Dream or Nightmare?},
author = {Simon Eismann and Cor-Paul Bezemer and Weiyi Shang and Dušan Okanović and André van Hoorn },
year = {2020},
date = {2020-01-24},
urldate = {2020-01-24},
booktitle = {ACM/SPEC International Conference on Performance Engineering (ICPE)},
pages = {1--12},
abstract = {In recent years, there has been a shift in software development towards microservice-based architectures, which consist of small services that focus on one particular functionality. Many companies are migrating their applications to such architectures to reap the benefits of microservices, such as increased flexibility, scalability and a smaller granularity of the offered functionality by a service.
On the one hand, the benefits of microservices for functional testing are often praised, as the focus on one functionality and their smaller granularity allow for more targeted and more convenient testing. On the other hand, using microservices has their consequences (both positive and negative) on other types of testing, such as performance testing. Performance testing is traditionally done by establishing the baseline performance of a software version, which is then used to compare the performance testing results of later software versions. However, as we show in this paper, establishing such a baseline performance is challenging in microservice applications.
In this paper, we discuss the benefits and challenges of microservices from a performance tester’s point of view. Through a series of experiments on the TeaStore application, we demonstrate how microservices affect the performance testing process, and we demonstrate that it is not straightforward to achieve reliable performance testing results for a microservice application.},
keywords = {DevOps, Microservices, Performance, Regression testing},
pubstate = {published},
tppubtype = {inproceedings}
}
On the one hand, the benefits of microservices for functional testing are often praised, as the focus on one functionality and their smaller granularity allow for more targeted and more convenient testing. On the other hand, using microservices has their consequences (both positive and negative) on other types of testing, such as performance testing. Performance testing is traditionally done by establishing the baseline performance of a software version, which is then used to compare the performance testing results of later software versions. However, as we show in this paper, establishing such a baseline performance is challenging in microservice applications.
In this paper, we discuss the benefits and challenges of microservices from a performance tester’s point of view. Through a series of experiments on the TeaStore application, we demonstrate how microservices affect the performance testing process, and we demonstrate that it is not straightforward to achieve reliable performance testing results for a microservice application.
