We present an analysis and visualization method for computing what distinguishes a given document collection from others. We determine topics that discriminate a subset of collections from the remaining ones by applying probabilistic topic modeling and subsequently approximating the two relevant criteria distinctiveness and characteristicness algorithmically through a set of heuristics. Furthermore, we suggest a novel visualization method called DiTop-View, in which topics are represented by glyphs (topic coins) that are arranged on a 2D plane. Topic coins are designed to encode all information necessary for performing comparative analyses such as the class membership of a topic, its most probable terms and the discriminative relations. We evaluate our topic analysis using statistical measures and a small user experiment and present an expert case study with researchers from political sciences analyzing two real-world datasets.
@article{Oelke2014ComparativeExplorationDocument, acmid = {2771516}, address = {Chichester, UK}, author = {D. Oelke, H. Strobelt, C. Rohrdantz, I. Gurevych, O. Deussen}, doi = {10.1111/cgf.12376}, issn = {0167-7055}, issue_date = {June 2014}, journal = {Computer Graphics Forum}, keywords = {Categories and Subject Descriptors according to ACM CCS:, H.5.m [Information Systems]: Information Interfaces and Presentation-Miscellaneous}, month = {jun}, number = {3}, numpages = {10}, pages = {201--210}, publisher = {The Eurographs Association & John Wiley & Sons, Ltd.}, title = {Comparative Exploration of Document Collections: A Visual Analytics Approach}, url = {http://graphics.uni-konstanz.de/publikationen/Oelke2014ComparativeExplorationDocument}, volume = {33}, year = {2014} }