@proceedings {BB08, title = {A Lazy Implementation of a Language for Approximate Filtering of XML Documents}, journal = {Proceedings of the 16th International Workshop on Functional and (Constraint) Logic Programming (WFLP 2007)}, volume = {216}, year = {2008}, pages = {93{\textendash}109}, publisher = {Elsevier}, abstract = {In this paper, we introduce a system, written in Haskell, for filtering information from XML data. Essentially, the system implements a simple declarative language which allows one to extract relevant data as well as to exclude useless and misleading contents from an XML document by matching patterns against XML documents. The matching mechanism employes a cost-based pattern transformation algorithm which searches for patterns in an approximate way (i.e. modulo renaming, insertion, and deletion of XML items) and ranks the results w.r.t. their cost. In order to improve efficiency, the implementation uses sophisticated indexing techniques and exploits laziness to automatically avoid the construction of unnecessary data structures. We analyzed both the expressiveness of our filtering language and the performance of the system using the well known XMark benchmark suite.}, keywords = {Approximate Filtering of XML, PHIL}, author = {Michele Baggi and Demis Ballis} } @proceedings {BBF08, title = {XML Semantic Filtering via Ontology Reasoning}, journal = {3rd International Conference on Internet and Web Applications and Services, ICIW 2008}, year = {2008}, pages = {482{\textendash}487}, publisher = {IEEE Computer Society}, abstract = {In this paper, we present an extension of PHIL, a declarative language for filtering information from XML data. The proposed approach allows us to extract relevant data as well as to exclude useless and misleading contents from an XML document. Essentially, it combines ontology reasoning with an approximate pattern-matching engine which searches for patterns in a flexible way (i.e. modulo renaming, insertion, and deletion of XML items) and ranks the results w.r.t. their cost. The filtering process is guided by the syntax as well as the semantics of the XML documents, since it relies on both the document structure and the ontological information to which the document is related. Such information is retrieved by querying (possibly remote) ontology reasoners. Ontology reasoning capabilities are integrated into the filtering language via an adapted version of the DIG interface, which is a standard framework for describing description logic systems. Our extension to the DIG interface allows one to deal with non-ground ontology queries. }, keywords = {Approximate Filtering of XML, Description logic, DIG, OWL, PHIL, Semantic Filtering}, author = {Michele Baggi and Demis Ballis and Moreno Falaschi} }