TraceRepo - Supplementary Material to two Secondary Studies on Trace Recovery based on Information Retrieval

Primary Publications, BibTeX

This section lists all primary publications included in the secondary studies. The information is available in a BibTeX-format.

@inproceedings{abadi_traceability_2008,

title = {A traceability technique for specifications},

abstract = {Traceability in software involves discovering links between different artifacts, and is useful for a myriad of tasks in the software life cycle. We compare several different Information Retrieval techniques for this task, across two datasets involving real-world software with the accompanying specifications and documentation. The techniques compared include dimensionality reduction methods, probabilistic and information theoretic approaches, and the standard vector space model.},

booktitle = {Proceedings of the 16th International Conference on Program Comprehension},

author = {Abadi, A. and Nisenson, M. and Simionovici, Y.},

year = {2008},

keywords = {approaches;probabilistic, approaches;software, cycle;specifications;standard, dimensionality, documentation;, formal, Life, links2code, maintenance;system, methods;documentation;information, model;traceability, primary, reduction, retrieval, retrieval;software, space, specification;information, technique;formal, techniques;information, theoretic, vector},

pages = {103--112}

@inproceedings{ali_requirements_2011,

title = {Requirements traceability for object oriented systems by partitioning source code},

abstract = {Requirements trace ability ensures that source code is consistent with documentation and that all requirements have been implemented. During software evolution, features are added, removed, or modified, the code drifts away from its original requirements. Thus trace ability recovery approaches becomes necessary to re-establish the trace ability relations between requirements and source code. This paper presents an approach (Coparvo) complementary to existing trace ability recovery approaches for object-oriented programs. Coparvo reduces false positive links recovered by traditional trace ability recovery processes thus reducing the manual validation effort. Coparvo assumes that information extracted from different entities (i.e., class names, comments, class variables, or methods signatures) are different information sources, they may have different level of reliability in requirements trace ability and each information source may act as a different expert recommending trace ability links. We applied Coparvo on three data sets, Pooka, {SIP} Communicator, and {iTrust}, to filter out false positive links recovered via the information retrieval approach, i.e., vector space model. The results show that Coparvo significantly improves the of the recovered links accuracy and also reduces up to 83\% effort required to manually remove false positive links. © 2011 {IEEE.}},

booktitle = {Proceedings - Working Conference on Reverse Engineering, {WCRE}},

author = {Ali, N. and Gueheneuc, Y. and Antoniol, G.},

year = {2011},

keywords = {primary},

pages = {45--54},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\3RR8MHPD\record.html:text/html}

@inproceedings{ali_trust-based_2011,

title = {Trust-Based Requirements Traceability},

abstract = {Information retrieval ({IR)} approaches have proven useful in recovering traceability links between free text documentation and source code. {IR-based} traceability recovery approaches produce ranked lists of traceability links between pieces of documentation and source code. These traceability links are then pruned using various strategies and, finally, validated by human experts. In this paper we propose two contributions to improve the precision and recall of traceability links and, thus, reduces the required human experts' manual validation effort. First, we propose a novel approach, Trustrace, inspired by Web trust models to improve the precision and recall of traceability links: Trustrace uses any traceability recovery approach to obtain a set of traceability links, which rankings are then re-evaluated using a set of other traceability recovery approaches. Second, we propose a novel traceability recovery approach, Histrace, to identify traceability links between requirements and source code through {CVS/SVN} change logs using a Vector Space Model ({VSM).} We combine a traditional recovery traceability approach with Histrace to build {TrustraceVSM}, Histrace in which we use Histrace as one expert adding knowledge to the traceability links extracted from {CVS/SVN} change logs. We apply {TrustraceVSM}, Histrace on two case studies to compare its traceability links with those recovered using only the {VSM-based} approach, in terms of precision and recall. We show that {TrustraceVSM}, Histrace improves with statistical significance the precision of the traceability links while also improving recall but without statistical significance.},

language = {English},

booktitle = {Proceedings of the 19th International Conference on Program Comprehension},

author = {Ali, N. and Gueheneuc, Y-G. and Antoniol, G.},

year = {2011},

keywords = {primary},

pages = {111--120},

file = {IEEE Xplore Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\J36XSEAD\Ali et al. - 2011 - Trust-Based Requirements Traceability.pdf:application/pdf}

@inproceedings{antoniol_tracing_2000,

title = {Tracing object-oriented code into functional requirements},

abstract = {Software system documentation is almost always expressed informally, in natural language and free text. Examples include requirement specifications, design documents, manual pages, system development journals, error logs and related maintenance reports. We propose an approach to establish and maintain traceability links between source code and free text documents. A premise of our work is that programmers use meaningful names for program items, such as functions, variables, types, classes, and methods. We believe that the application-domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore, the analysis of these mnemonics can help to associate high level concepts with program concepts, and vice-versa. The approach is applied to software written in an object oriented language, namely Java, to trace classes to functional requirements},

language = {English},

booktitle = {Proceedings of the 8th International Workshop on Program Comprehension},

author = {Antoniol, G. and Canfora, G. and Casazza, G. and De Lucia, A. and Merlo, E.},

year = {2000},

keywords = {primary},

pages = {79--86},

file = {IEEE Xplore Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\SDNQD3GR\Antoniol et al. - 2000 - Tracing object-oriented code into functional requi.pdf:application/pdf}

@inproceedings{antoniol_recovering_2002,

title = {Recovering traceability links between code and documentation},

volume = {28},

abstract = {Software system documentation is almost always expressed informally in natural language and free text. Examples include requirement specifications, design documents, manual pages, system development journals, error logs, and related maintenance reports. We propose a method based on information retrieval to recover traceability links between source code and free text documents. A premise of our work is that programmers use meaningful names for program items, such as functions, variables, types, classes, and methods. We believe that the application-domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore, the analysis of these mnemonics can help to associate high-level concepts with program concepts and vice-versa. We apply both a probabilistic and a vector space information retrieval model in two case studies to trace C++ source code onto manual pages and Java code to functional requirements. We compare the results of applying the two models, discuss the benefits and limitations, and describe directions for improvements.},

booktitle = {Transactions on Software Engineering},

author = {Antoniol, G. and Canfora, G. and Casazza, G. and De Lucia, A. and Merlo, E.},

year = {2002},

keywords = {links2code, primary},

pages = {970--983}

@inproceedings{antoniol_recovering_1999,

title = {Recovering code to documentation links in {OO} systems},

abstract = {Software system documentation is almost always expressed informally, in natural language and free text. Examples include requirement specifications, design documents, user manual pages, system development journals, error logs and related maintenance reports. We propose an approach to establish and maintain traceability links between the source code and free-text documents. A premise of our work is that programmers use meaningful names for program's items, such as functions, variables, types, classes and methods. We believe that the application domain knowledge that programmers process when writing the code is often captured by the mnemonics for identifiers; therefore, the analysis of these mnemonics can help to associate high-level concepts with program concepts, and vice versa. In this paper, the approach is applied to software written in an object-oriented ({OO)} language, namely C++, to trace classes to manual sections},

booktitle = {Proceedings of the 6th Working Conference on Reverse Engineering},

author = {Antoniol, G. and Canfora, G. and De Lucia, A. and Merlo, E.},

year = {1999},

keywords = {links2code, primary},

pages = {136--144}

@inproceedings{asuncion_software_2010,

title = {Software traceability with topic modeling},

abstract = {Software traceability is a fundamentally important task in software engineering. The need for automated traceability increases as projects become more complex and as the number of artifacts increases. We propose an automated technique that combines traceability with a machine learning technique known as topic modeling. Our approach automatically records traceability links during the software development process and learns a probabilistic topic model over artifacts. The learned model allows for the semantic categorization of artifacts and the topical visualization of the software system. To test our approach, we have implemented several tools: an artifact search tool combining keyword-based search and topic modeling, a recording tool that performs prospective traceability, and a visualization tool that allows one to navigate the software architecture and view semantic topics associated with relevant artifacts and architectural components. We apply our approach to several data sets and discuss how topic modeling enhances software traceability, and vice versa. \© 2010 {ACM.}},

booktitle = {Proceedings of the International Conference on Software Engineering},

author = {Asuncion, H. and Asuncion, A. and Taylor, R.},

year = {2010},

keywords = {primary},

pages = {95--104}

@inproceedings{ben_charrada_towards_2011,

title = {Towards a benchmark for traceability},

abstract = {Rigorously evaluating and comparing traceability link generation techniques is a challenging task. In fact, traceability is still expensive to implement and it is therefore difficult to find a complete case study that includes both a rich set of artifacts and traceability links among them. Consequently, researchers usually have to create their own case studies by taking a number of existing artifacts and creating traceability links for them. There are two major issues related to the creation of one's own example. First, creating a meaningful case study is time consuming. Second, the created case usually covers a limited set of artifacts and has a limited applicability (e.g., a case with traces from high-level requirements to low-level requirements cannot be used to evaluate traceability techniques that are meant to generate links from documentation to source code). We propose a benchmark for traceability that includes all artifacts that are typically produced during the development of a software system and with end-to-end traceability linking. The benchmark is based on an irrigation system that was elaborated in a book about software design. The main task considered by the benchmark is the generation of traceability links among different types of software artifacts. Such a traceability benchmark will help advance research in this field because it facilitates the evaluation and comparison of traceability techniques and makes the replication of experiments an easy task. As a proof of concept we used the benchmark to evaluate the precision and recall of a link generation technique based on the vector space model. Our results are comparable to those obtained by other researchers using the same technique. © 2011 {ACM.}},

booktitle = {Proceedings of the 12th International Workshop on Principles on Software Evolution},

author = {Ben Charrada, E. and Caspar, D. and Jeanneret, C. and Glinz, M.},

year = {2011},

keywords = {primary},

pages = {21--30},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\Z2X87V2I\record.html:text/html}

@inproceedings{borg_better_2011,

title = {Do better {IR} tools improve the accuracy of engineers' traceability recovery?},

abstract = {Large-scale software development generates an ever-growing amount of information. Multiple research groups have proposed using approaches from the domain of information retrieval ({IR)} to recover traceability. Several enhancement strategies have been initially explored using the laboratory model of {IR} evaluation for performance assessment. We conducted a pilot experiment using printed candidate lists from the tools {RETRO} and {ReqSimile} to investigate how different quality levels of tool output affect the tracing accuracy of engineers. Statistical testing of equivalence, commonly used in medicine, has been conducted to analyze the data. The low number of subjects in this pilot experiment resulted neither in statistically significant equivalence nor difference. While our results are not conclusive, there are indications that it is worthwhile to investigate further into the actual value of improving tool support for semi-automatic traceability recovery. For example, our pilot experiment showed that the effect size of using {RETRO} versus {ReqSimile} is of practical significance regarding precision and F-measure. The interpretation of the effect size regarding recall is less clear. The experiment needs to be replicated with more subjects and on varying tasks to draw firm conclusions.},

urldate = {2012-01-08},

booktitle = {Proceedings of the International Workshop on Machine Learning Technologies in Software Engineering},

author = {Borg, M. and Pfahl, D.},

year = {2011},

keywords = {controlled experiment, equivalence testing, information retrieval, primary, requirements traceability},

pages = {27--34},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\9PUVBKKD\Borg och Pfahl - 2011 - Do better IR tools improve the accuracy of enginee.pdf:application/pdf;Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\CBJUH8JB\record.html:text/html}

@inproceedings{canfora_fine_2006,

title = {Fine grained indexing of software repositories to support impact analysis},

abstract = {Versioned and bug-tracked software systems provide a huge amount of historical data regarding source code changes and issues management. In this paper we deal with impact analysis of a change request and show that data stored in software repositories are a good descriptor on how past

change requests have been resolved. A ¯ne grained analysis method of software repositories is used to index code at different levels of granularity, such as lines of code and source files, with free text contained in software repositories. The method exploits information retrieval algorithms to link the

change request description and code entities impacted by similar past change requests. We evaluate such approach on a set of three open-source projects.},

urldate = {2011-11-21},

booktitle = {Proceedings of the International Workshop on Mining software repositories},

author = {Canfora, G. and Cerulo, L.},

year = {2006},

keywords = {primary},

pages = {105--111},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\65KATB8T\Canfora and Cerulo - 2006 - Fine grained indexing of software repositories to .pdf:application/pdf}

@inproceedings{capobianco_role_2009,

title = {On the Role of the Nouns in {IR-based} Traceability Recovery},

abstract = {The intensive human effort needed to manually manage traceability information has increased the interest in utilising semi-automated traceability recovery techniques. This paper presents a simple way to improve the accuracy of traceability recovery methods based on Information Retrieval techniques. The proposed method acts on the artefact indexing considering only the nouns contained in the artefact content to define the semantics of an artefact. The rationale behind such a choice is that the language used in software documents can be classified as a sectorial language,

where the terms that provide more indication on the semantics of a document are the nouns. The results of a reported case study demonstrate that the proposed artefact indexing significantly improves the accuracy of traceability recovery methods based on the probabilistic or vector space based {IR} models.},

booktitle = {Proceedings of the 17th International Conference on Program Comprehension},

author = {Capobianco, G. and De Lucia, A. and Oliveto, R. and Panichella, A. and Panichella, S.},

year = {2009},

keywords = {primary},

pages = {148--157}

@inproceedings{capobianco_traceability_2009,

title = {Traceability Recovery using Numerical Analysis},

abstract = {The paper proposes a novel information retrieval technique based on numerical analysis for recovering traceability links between code and software documentation. The results of a reported case study demonstrate that the proposed approach significantly outperforms two vector-based {IR} models, i.e., the vector space model and latent semantic indexing, and it is comparable and sometimes better than a probabilistic model, i.e., the Jensen-Shannon method. The paper also discusses the influence of each method with the specific artifact type considered and the artifact language.},

booktitle = {Proceedings of the 16th Working Conference on Reverse Engineering},

author = {Capobianco, G. and De Lucia, A. and Oliveto, R. and Panichella, A. and Panichella, S.},

year = {2009},

keywords = {{analysis;Software}, coding;, documentation;traceability, engineering;source, indexing;numerical, information, {IR}, Jensen-Shannon, language;code;information, links2code, method;artifact, model;vector-based, models;information, primary, recovery;vector, retrieval;latent, retrieval;software, semantic, space},

pages = {195--204}

@inproceedings{chen_extraction_2010,

title = {Extraction and visualization of traceability relationships between documents and source code},

abstract = {Traceability links between artifacts in a software system aid developers in comprehension, development, and effective management of the system. Traceability systems to date have been confronting the difficulties in retrieving relationships between artifacts with high quality and accuracy, and in visualizing extracted relationships in a natural and intuitive way. This research aims to combine several traceability recovery techniques to make up for each other's weaknesses to extract relationships between artifacts at a high-level accuracy and quality. Moreover, the recovered relationships are visualized in a hierarchical rich graphical tree that can be expanded and contracted to help users easily interact with these links and move easily between artifacts and their related artifacts and vice versa. Our preliminary evaluation demonstrated that integration of several traceability recovery techniques can improve the quality and accuracy of retrieved links. \© 2010 {ACM.}},

booktitle = {Proceedings of the International Conference on Automated Software Engineering},

author = {Chen, X.},

year = {2010},

keywords = {links2code, primary},

pages = {505--509}

@inproceedings{chen_improving_2011,

title = {Improving automated documentation to code traceability by combining retrieval techniques},

abstract = {Documentation written in natural language and source code are two of the major artifacts of a software system. Tracking a variety of traceability links between software documentation and source code assists software developers in comprehension, efficient development, and effective management of a system. Automated traceability systems to date have been faced with a major open research challenge: how to extract these links with both high precision and high recall. In this paper we introduce an approach that combines three supporting techniques, Regular Expression, Key Phrases, and Clustering, with a Vector Space Model (VSM) to improve the performance of automated traceability between documents and source code. This combination approach takes advantage of strengths of the three techniques to ameliorate limitations of VSM. Four case studies have been used to evaluate our combined technique approach. Experimental results indicate that our approach improves the performance of VSM, increases the precision of retrieved links, and recovers more true links than VSM alone.},

booktitle = {Proceedings of the 26th International Conference on Automated Software Engineering},

author = {Chen, X. and Grundy, J.},

year = {2011},

keywords = {primary},

pages = {223--232}

@inproceedings{chen_combination_2011,

title = {A combination approach for enhancing automated traceability},

shorttitle = {A combination approach for enhancing automated traceability},

abstract = {Tracking a variety of traceability links between artifacts assists software developers in comprehension, efficient development, and effective management of a system. Traceability systems to date based on various Information Retrieval ({IR)} techniques have been faced with a major open research challenge: how to extract these links with both high precision and high recall. In this paper we describe an experimental approach that combines Regular Expression, Key Phrases, and Clustering with {IR} techniques to enhance the performance of {IR} for traceability link recovery between documents and source code. Our preliminary experimental results show that our combination technique improves the performance of {IR}, increases the precision of retrieved links, and recovers more true links than {IR} alone.},

urldate = {2011-07-28},

booktitle = {Proceeding of the 33rd International Conference on Software Engineering, ({NIER} track)},

author = {Chen, X. and Hosking, J. and Grundy, J.},

year = {2011},

keywords = {primary},

pages = {912--915},

file = {ACM Snapshot:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\8PDSAAQT\citation.html:text/html}

@inproceedings{cleland-huang_machine_2010,

title = {A machine learning approach for tracing regulatory codes to product specific requirements},

abstract = {Regulatory standards, designed to protect the safety, security, and privacy of the public, govern numerous areas of software intensive systems. Project personnel must therefore demonstrate that an as-built system meets all relevant regulatory codes. Current methods for demonstrating compliance rely either on after-the-fact audits, which can lead to significant refactoring when regulations are not met, or else require analysts to construct and use traceability matrices to demonstrate compliance. Manual tracing can be prohibitively time-consuming; however automated trace retrieval methods are not very effective due to the vocabulary mismatches that often occur between regulatory codes and product level requirements. This paper introduces and evaluates two machine-learning methods, designed to improve the quality of traces generated between regulatory codes and product level requirements. The first approach uses manually created traceability matrices to train a trace classifier, while the second approach uses web-mining techniques to reconstruct the original trace query. The techniques were evaluated against security regulations from the {USA} government's Health Insurance Privacy and Portability Act ({HIPAA)} traced against ten healthcare related requirements specifications. Results demonstrated improvements for the subset of {HIPAA} regulations that exhibited high fan-out behavior across the requirements datasets. \© 2010 {ACM.}},

booktitle = {Proceedings International Conference on Software Engineering},

author = {Cleland-Huang, J. and Czauderna, A. and Gibiec, M. and Emenecker, J.},

year = {2010},

keywords = {primary},

pages = {155--164}

@inproceedings{cleland-huang_utilizing_2005,

title = {Utilizing supporting evidence to improve dynamic requirements traceability},

abstract = {Requirements traceability provides critical support throughout all phases of a software development project. However practice has repeatedly shown the difficulties involved in long term maintenance of traditional traceability matrices. Dynamic retrieval methods minimize the need for creating and maintaining explicit links and can significantly reduce the effort required to perform a manual trace. Unfortunately they suffer from recall and precision problems. This paper introduces three strategies for incorporating supporting information into a probabilistic retrieval algorithm in order to improve the performance of dynamic requirements traceability. The strategies include hierarchical modeling, logical clustering of artifacts, and semi-automated pruning of the probabilistic network. Experimental results indicate that enhancement strategies can be used effectively to improve trace retrieval results thereby increasing the practicality of utilizing dynamic trace retrieval methods.},

booktitle = {Proceedings of the 13th International Conference on Requirements Engineering},

author = {Cleland-Huang, J. and Settimi, R. and Duan, C. and Zou, X. C.},

year = {2005},

keywords = {primary},

pages = {135--144}

@article{cleland-huang_best_2007,

title = {Best Practices for Automated Traceability},

volume = {40},

abstract = {Automated traceability applies information-retrieval techniques to generate candidate links, sharply reducing the effort of manual approaches to build and maintain a requirements trace matrix as well as providing after-the-fact traceability in legacy {documents.The} authors describe nine best practices for implementing effective automated traceability.},

number = {6},

journal = {Computer},

author = {Cleland-Huang, J. and Settimi, R. and Romanova, E. and Berenbach, B. and Clark, S.},

year = {2007},

keywords = {primary},

pages = {27--35}

@inproceedings{cuddeback_automated_2010,

title = {Automated requirements traceability: The study of human analysts},

abstract = {The requirements traceability matrix ({RTM)} supports many software engineering and software verification and validation ({V\&V)} activities such as change impact analysis, reverse engineering, reuse, and regression testing. The generation of {RTMs} is tedious and error-prone, though, thus {RTMs} are often not generated or maintained. Automated techniques have been developed to generate candidate {RTMs} with some success. When using {RTMs} to support the {V\&V} of mission- or safety-critical systems, however, a human analyst must vet the candidate {RTMs.} The focus thus becomes the quality of the final {RTM.} This paper investigates how human analysts perform when vetting candidate {RTMs.} Specifically, a study was undertaken at two universities and had 26 participants analyze {RTMs} of varying accuracy for a Java code formatter program. The study found that humans tend to move their candidate {RTM} toward the line that represents recall = precision. Participants who examined {RTMs} with low recall and low precision drastically improved both. \© 2010 {IEEE.}},

booktitle = {Proceedings of the 18th International Requirements Engineering Conference},

author = {Cuddeback, D. and Dekhtyar, A. and Huffman Hayes, J.},

year = {2010},

keywords = {primary},

pages = {231--240}

@inproceedings{czauderna_traceability_2011,

title = {Traceability challenge 2011: using Tracelab to evaluate the impact of local versus global idf on trace retrieval},

abstract = {Numerous trace retrieval algorithms incorporate the standard tf-idf (term frequency, inverse document frequency) technique to weight various terms. In this paper we address Grand Challenge C-{GC1} by comparing the effectiveness of computing idf based only on the local terms in the query, versus computing it based on general term usage as documented in the American National Corpus. We also address Grand Challenges L-{GC1} and L-{GC2} by setting ourselves the additional task of designing and conducting the experiments using the alpha-release of {TraceLab.} {TraceLab} is an experimental workbench which allows researchers to graphically model and execute a traceability experiment as a workflow of components. Results of the experiment show that the local idf approach exceeds or matches the global approach in all of the cases studied.},

urldate = {2011-11-21},

booktitle = {Proceeding of the 6th International Workshop on Traceability in Emerging Forms of Software Engineering},

author = {Czauderna, A. and Gibiec, M. and Leach, G. and Li, Y. and Shin, Y. and Keenan, E. and Cleland-Huang, J.},

year = {2011},

keywords = {primary},

pages = {75--78},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\25TMFSEI\Czauderna et al. - 2011 - Traceability challenge 2011 using tracelab to eva.pdf:application/pdf}

@inproceedings{de_lucia_improving_2011,

title = {Improving {IR-based} Traceability Recovery Using Smoothing Filters},

abstract = {Information Retrieval methods have been largely adopted to identify traceability links based on the textual similarity of software artifacts. However, noise due to word usage in software artifacts might negatively affect the recovery accuracy. We propose the use of smoothing filters to reduce the effect of noise in software artifacts and improve the performances of traceability recovery methods. An empirical evaluation performed on two repositories indicates that the usage of a smoothing filter is able to significantly improve the performances of Vector Space Model and Latent Semantic Indexing. Such a result suggests that other than being used for traceability recovery the proposed filter can be used to improve performances of various other software engineering approaches based on textual analysis.},

language = {English},

booktitle = {Proceedings of the 19th International Conference on Program Comprehension},

author = {De Lucia, A. and Di Penta, M. and Oliveto, R. and Panichella, A. and Panichella, S.},

year = {2011},

keywords = {primary},

pages = {21--30},

file = {IEEE Xplore Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\VETP4EFR\De Lucia et al. - 2011 - Improving IR-based Traceability Recovery Using Smo.pdf:application/pdf}

@inproceedings{de_lucia_enhancing_2004,

title = {Enhancing an artefact management system with traceability recovery features},

abstract = {We present a traceability recovery method and tool based on Latent Semantic Indexing ({LSI)} in the context of an artefact management system. The tool highlights the candidate links not identified yet by the software engineer and the links identified but missed by the tool, probably due to inconsistencies in the usage of domain terms in the traced software artefacts. We also present a case study of using the traceability recovery tool on software artefacts belonging to different categories of documents, including requirement, design, and testing documents, as well as code components.},

booktitle = {Proceedings of the 20th International Conference on Software Maintenance},

author = {De Lucia, A. and Fasano, F. and Oliveto, R. and Tortora, G.},

year = {2004},

keywords = {artefact, artefact;, code, component;, configuration, design, diagnostics;, diagnostics;software, document;, documentation;, documentation;system, indexing;, latent, management, management;, management;program, management;software, method;, primary, program, recovery, recovery;, requirement, semantic, software, system, system;, {TESTING}, tool;, tools;, tools;system, traceability, traced},

pages = {306--315}

@inproceedings{de_lucia_adams_2005,

title = {{ADAMS} re-trace: A traceability recovery tool},

abstract = {We present the traceability recovery tool developed in the {ADAMS} artefact management system. The tool is based on an Information Retrieval technique, namely Latent Semantic Indexing and aims at supporting the software engineer in the identification of the traceability links between artefacts of different types. We also present a case study involving seven student projects which represented an ideal workbench for the tool. The results emphasise the benefits provided by the tool in terms of new traceability links discovered, in addition to the links manually traced by the software engineer. Moreover, the tool was also helpful in identifying cases of lack of similarity between artefacts manually traced by the software engineer, thus revealing inconsistencies in the usage of domain terms in these artefacts. This information is valuable to assess the quality of the produced artefacts.},

booktitle = {Proceedings of the 9th European Conference on Software Maintenance and Reengineering},

author = {De Lucia, A. and Fasano, F. and Oliveto, R. and Tortora, G.},

year = {2005},

keywords = {primary},

pages = {32--41}

@article{de_lucia_recovering_2007,

title = {Recovering traceability links in software artifact management systems using information retrieval methods},

volume = {16},

abstract = {The main drawback of existing software artifact management systems is the lack of automatic or semi-automatic traceability link generation and maintenance. We have improved an artifact management system with a traceability recovery tool based on Latent Semantic Indexing ({LSI)}, an information retrieval technique. We have assessed {LSI} to identify strengths and limitations of using information retrieval techniques for traceability recovery and devised the need for an incremental approach. The method and the tool have been evaluated during the development of seventeen software projects involving about 150 students. We observed that although tools based on information retrieval provide a useful support for the identification of traceability links during software development, they are still far to support a complete semi-automatic recovery of all links. The results of our experience have also shown that such tools can help to identify quality problems in the textual description of traced artifacts. \© 2007 {ACM.}},

number = {4},

journal = {Transactions on Software Engineering and Methodology},

author = {De Lucia, A. and Fasano, F. and Oliveto, R. and Tortora, G.},

year = {2007},

keywords = {diagnostics;project, information, maintenance;software, management;software, primary, quality;software, recovery;, retrieval;program, tools;system}

@inproceedings{de_lucia_incremental_2006,

title = {Incremental approach and user feedbacks: A silver bullet for traceability recovery?},

abstract = {Several authors apply Information Retrieval ({IR)} techniques to recover traceability links between software artefacts. Recently, the use of user feedbacks (in terms of classification of retrieval links as correct or false positives) has been proposed to improve the retrieval performances of these techniques. In this paper we present a critical analysis of using feedbacks within an incremental traceability recovery process. In particular, we analyse the trade-off between the improvement of the performances and the link classification effort required to train the {IR-based} traceability recovery tool. We also present the results achieved in case studies and show that even though the retrieval performances generally improve with the use of feedbacks, {IR-based} approaches are still far from solving the problem of recovering all correct links with a low classification effort. \© 2006 {IEEE.}},

booktitle = {Proceedings of the International Conference on Software Maintenance},

author = {De Lucia, A. and Oliveto, R. and Sgueglia, P.},

year = {2006},

keywords = {engineering;, feedback;software, primary, relevance},

pages = {299--308}

@inproceedings{de_lucia_ir-based_2008,

title = {{IR-based} traceability recovery processes: An empirical comparison of "one-shot" and incremental processes},

abstract = {We present the results of a controlled experiment aiming at analysing the role played by the approach adopted during an {IR-based} traceability recovery process. In particular, we compare the tracing performances achieved by subjects using the "one-shot" process, where the full ranked list of candidate links is proposed, and the incremental process, where a similarity threshold is used to cut the ranked list and the links are classified step-by-step. The analysis of the achieved results shows that, in general, the incremental process improves the tracing accuracy and reduces the effort to analyse the proposed links. \© 2008 {IEEE.}},

booktitle = {Proceedings of the 23rd International Conference on Automated Software Engineering},

author = {De Lucia, A. and Oliveto, R. and Tortora, G.},

year = {2008},

keywords = {primary},

pages = {39--48}

@article{de_lucia_assessing_2009,

title = {Assessing {IR-based} traceability recovery tools through controlled experiments},

volume = {14},

abstract = {We report the results of a controlled experiment and a replication performed with different subjects, in which we assessed the usefulness of an Information Retrieval-based traceability recovery tool during the traceability link identification process. The main result achieved in the two experiments is that the use of a traceability recovery tool significantly reduces the time spent by the software engineer with respect to manual tracing. Replication with different subjects allowed us to investigate if subjects' experience and ability play any role in the traceability link identification process. In particular, we made some observations concerning the retrieval accuracy achieved by the software engineers with and without the tool support and with different levels of experience and ability. \© 2008 Springer {Science+Business} Media, {LLC.}},

number = {1},

journal = {Empirical Software Engineering},

author = {De Lucia, A. and Oliveto, R. and Tortora, G.},

year = {2009},

keywords = {primary},

pages = {57--92}

@inproceedings{de_lucia_role_2009,

title = {The Role of the Coverage Analysis during {IR-based} Traceability Recovery: a Controlled Experiment},

abstract = {This paper presents a two-steps process aiming at improving

the tracing performances of the software engineer

when using an {IR-based} traceability recovery tool. In

the first step the software engineer performs an incremental

coarse-grained traceability recovery between a set of

source artefacts and a set of target artefacts. During this

step he/she traces as many links as possible keeping low the

effort to discard false positives. In the second step he/she

uses a coverage link analysis aiming at identifying source

artefacts poorly traced and guiding focused fine-grained

traceability recovery sessions to recover links missed in the

first step. The results achieved in a reported controlled experiment

demonstrate that the proposed approach significantly

increases the amount of correct links traced by the

software engineer with respect to a tradition process.},

booktitle = {Proceedings of the International Conference on Software Maintenance},

author = {De Lucia, A. and Oliveto, R. and Tortora, G.},

year = {2009},

keywords = {primary},

pages = {371--380}

@inproceedings{dekhtyar_human_2011,

title = {On human analyst performance in assisted requirements tracing: Statistical analysis},

shorttitle = {On human analyst performance in assisted requirements tracing},

abstract = {Assisted requirements tracing is a process in which a human analyst validates candidate traces produced by an automated requirements tracing method or tool. The assisted requirements tracing process splits the difference between the commonly applied time-consuming, tedious, and error-prone manual tracing and the automated requirements tracing procedures that are a focal point of academic studies. In fact, in software assurance scenarios, assisted requirements tracing is the only way in which tracing can be at least partially automated. In this paper, we present the results of an extensive 12 month study of assisted tracing, conducted using three different tracing processes at two different sites. We describe the information collected about each study participant and their work on the tracing task, and apply statistical analysis to study which factors have the largest effect on the quality of the final trace. © 2011 {IEEE.}},

booktitle = {Proceedings of the 19th International Requirements Engineering Conference},

author = {Dekhtyar, A. and Dekhtyar, O. and Holden, J. and Huffman Hayes, J. and Cuddeback, D. and Kong, W.},

year = {2011},

keywords = {primary},

pages = {111--120},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\BFX4PZ6B\record.html:text/html}

@inproceedings{dekhtyar_make_2007,

title = {Make the most of your time: How should the analyst work with automated traceability tools?},

abstract = {Several recent studies employed traditional information retrieval ({IR)} methods to assist in the mapping of elements of software engineering artifacts to each other. This activity is referred to as candidate link generation because the final say in determining the final mapping belongs to the human analyst. Feedback techniques that utilize information from the analyst (on whether the candidate links are correct or not) have been shown to improve the quality of the mappings. Yet the analyst is making an investment of time in providing the feedback. This leads to the question of whether or not guidance can be provided to the analyst on how to best utilize that time. This paper simulates a number of approaches an analyst might take to evaluating the same candidate link list, and discovers that more structured and organized approaches appear to save time/effort of the analyst. \© 2007 {IEEE.}},

booktitle = {Proceedings of the 3rd International Workshop on Predictor Models in Software Engineering},

author = {Dekhtyar, A. and Huffman Hayes, J. and Larsen, J.},

year = {2007},

keywords = {primary}

@inproceedings{dekhtyar_technique_2007,

title = {Technique integration for requirements assessment},

abstract = {In determining whether to permit a safety-critical software system to be certified and in performing independent verification and validation ({IV\&V)} of safety- or mission-critical systems, the requirements traceability matrix ({RTM)} delivered by the developer must be assessed for accuracy. The current state of the practice is to perform this work manually, or with the help of general-purpose tools such as word processors and spreadsheets Such work is error-prone and person-power intensive. In this paper, we extend our prior work in application of Information Retrieval ({IR)} methods for candidate link generation to the problem of {RTM} accuracy assessment. We build voting committees from five {IR} methods, and use a variety of voting schemes to accept or reject links from given candidate {RTMs.} We report on the results of two experiments. In the first experiment, we used 25 candidate {RTMs} built by human analysts for a small tracing task involving a portion of a {NASA} scientific instrument specification. In the second experiment, we randomly seeded faults in the {RTM} for the entire specification. Results of the experiments are presented. \© 2007 {IEEE.}},

booktitle = {Proceedings of the 15th International Requirements Engineering Conference},

author = {Dekhtyar, A. and Huffman Hayes, J. and Sundaram, S. and Holbrook, A. and Dekhtyar, O.},

year = {2007},

keywords = {independent, information, matrix;safety-critical, primary, retrieval;program, {retrieval;Requirements}, software, software;, system;information, traceability, validation;independent, verification;information, verification;safety-critical},

pages = {141--152}

@inproceedings{di_improving_2009,

title = {An Improving Approach for Recovering Requirements-to-Design Traceability Links},

abstract = {Requirement tracing is an important activity for its helpfulness to effective system quality assurance, impact analyzing of changes and software maintenance. In this paper, we propose an automatic approach called {LGRTL} to recover traceability links between high-level requirements and low-level design elements. This approach treats the recovery process as Bayesian classification process. Meanwhile, we add a synonym process to the preprocessing phase, and improve the Bayesian model for performing better. To evaluate the validity of the method, we perform a case study and the experimental results show that our method can enhance the effect to a certain extent.},

language = {English},

booktitle = {Proceedings of the International Conference on Computational Intelligence and Software Engineering},

author = {Di, F. and Zhang, M.},

year = {2009},

keywords = {Bayes methods, Bayesian classification, Bayesian methods, Bayesian model, Computer science, Data mining, formal specification, high-level requirement, information retrieval, learning and generating requirements traceability links, low-level design element, Niobium compounds, pattern classification, Performance evaluation, primary, Programming profession, Quality assurance, recovery process, requirement tracing, requirements-to-design traceability links, software maintenance, software quality, system quality assurance, Thesauri},

pages = {1--6}

@inproceedings{di_penta_traceability_2002,

title = {Traceability recovery in {RAD} software systems},

abstract = {This paper proposes an approach and a process to recover traceability links between source code and free text documents in software system developed with extensive use of {COTS}, middleware, and automatically generated {code.The} approach relies on a process to filter information gathered from low level artifacts. Information filtering was performed according to a taxonomy of factors affecting traceability links recovery methods. Those factors were directly stem from software rapid development {techniques.The} approach was applied to recover traceability links from a industrial software, developed with {RAD} techniques and tools, and making use of {COTS} (e.g., database accesscomponents), automatically generated code (e.g., via {GUI} builder and report generators), and middleware (i.e., {CORBA).} Results are presented, along with lessons learned.},

booktitle = {Proceedings of the 10th International Workshop on Program Comprehension},

author = {Di Penta, M. and Gradara, S. and Antoniol, G.},

year = {2002},

keywords = {links2code, primary},

pages = {207--216}

@inproceedings{duan_clustering_2007,

title = {Clustering support for automated tracing},

abstract = {Automated trace tools dynamically generate links between various software artifacts such as requirements, design elements, code, test cases, and other less structured supplemental documents. Trace algorithms typically utilize information retrieval methods to compute similarity scores between pairs of artifacts. Results are returned to the user as a ranked set of candidate links, and the user is then required to evaluate the results through performing a top-down search through the list. Although clustering methods have previously been shown to improve the performance of information retrieval algorithms by increasing understandability of the results and minimizing human analysis effort, their usefulness in automated traceability tools has not yet been explored. This paper evaluates and compares the effectiveness of several existing clustering methods to support traceability; describes a technique for incorporating them into the automated traceability process; and proposes new techniques based on the concepts of theme cohesion and coupling to dynamically identify optimal clustering granularity and to detect cross-cutting concerns that would otherwise remain undetected by standard clustering algorithms. The benefits of utilizing clustering in automated trace retrieval are then evaluated through a case study. Copyright 2007 {ACM.}},

booktitle = {Proceedings of the International Conference on Automated Software Engineering},

author = {Duan, C. and Cleland-Huang, J.},

year = {2007},

keywords = {primary},

pages = {244--253}

@inproceedings{gethers_adaptive_2011,

title = {An adaptive approach to impact analysis from change requests to source code},

abstract = {The paper presents an adaptive approach to perform impact analysis from a given change request (e.g., a bug report) to source code. Given a textual change request, a single snapshot (release) of source code, indexed using Latent Semantic Indexing, is used to estimate the impact set. Additionally, the approach configures the best-fit combination of information retrieval, dynamic analysis, and data mining of past source code commits to produce an improved impact set. The tandem operation of the three techniques sets it apart from other related solutions. © 2011 {IEEE.}},

booktitle = {Proceedings of the 26th International Conference on Automated Software Engineering},

author = {Gethers, M. and Kagdi, H. and Dit, B. and Poshyvanyk, D.},

year = {2011},

keywords = {primary},

pages = {540--543},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\QW43S3EM\record.html:text/html}

@inproceedings{gethers_integrating_2011,

title = {On integrating orthogonal information retrieval methods to improve traceability recovery},

abstract = {Different Information Retrieval ({IR)} methods have been proposed to recover traceability links among software artifacts. Until now there is no single method that sensibly outperforms the others, however, it has been empirically shown that some methods recover different, yet complementary traceability links. In this paper, we exploit this empirical finding and propose an integrated approach to combine orthogonal {IR} techniques, which have been statistically shown to produce dissimilar results. Our approach combines the following {IR-based} methods: Vector Space Model ({VSM)}, probabilistic Jensen and Shannon ({JS)} model, and Relational Topic Modeling ({RTM)}, which has not been used in the context of traceability link recovery before. The empirical case study conducted on six software systems indicates that the integrated method outperforms stand-alone {IR} methods as well as any other combination of non-orthogonal methods with a statistically significant margin. © 2011 {IEEE.}},

booktitle = {{IEEE} International Conference on Software Maintenance, {ICSM}},

author = {Gethers, M. and Oliveto, R. and Poshyvanyk, D. and De Lucia, A.},

year = {2011},

keywords = {primary},

pages = {133--142},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\KIVV95A6\record.html:text/html}

@inproceedings{gibiec_towards_2010,

title = {Towards mining replacement queries for hard-to-retrieve traces},

abstract = {Automated trace retrieval methods can significantly reduce the cost and effort needed to create and maintain requirements traces. However, the set of generated traces is generally quite imprecise and must be manually evaluated by analysts. In applied settings when the retrieval algorithm is unable to find the relevant links for a given query, a human user can improve the trace results by manually adding additional search terms and filtering out unhelpful ones. However, the effectiveness of this approach is largely dependent upon the knowledge of the user. In this paper we present an automated technique for replacing the original query with a new set of query terms. These query terms are learned through seeding a web-based search with the original query and then processing the results to identify a set of domain-specific terms. The query-mining algorithm was evaluated and fine-tuned using security regulations from the {USA} government's Health Insurance Privacy and Portability Act ({HIPAA)} traced against ten healthcare related requirements specifications. \© 2010 {ACM.}},

booktitle = {Proceedings of the International Conference on Automated Software Engineering},

author = {Gibiec, M. and Czauderna, A. and Cleland-Huang, J.},

year = {2010},

keywords = {primary},

pages = {245--254}

@inproceedings{huffman_hayes_improving_2003,

title = {Improving requirements tracing via information retrieval},

abstract = {We present an approach for improving requirements tracing based on framing it as an information retrieval ({IR)} problem. Specifically, we focus on improving recall and precision in order to reduce the number of missed traceability links as well as to reduce the number of irrelevant potential links that an analyst has to examine when performing requirements tracing. Several {IR} algorithms were adapted and implemented to address this problem. We evaluated our algorithms by comparing their results and performance to those of a senior analyst who traced manually as well as with an existing requirements tracing tool. Initial results suggest that we can retrieve a significantly higher percentage of the links than analysts, even when using existing tools, and do so in much less time while achieving comparable signal-to-noise levels.},

booktitle = {Proceedings of the 11th Internationl Requirements Engineering Conference},

author = {Huffman Hayes, J. and Dekhtyar, A. and Osborne, J.},

year = {2003},

keywords = {primary},

pages = {138--147}

@inproceedings{huffman_hayes_text_2005,

title = {Text mining for software engineering: how analyst feedback impacts final results},

abstract = {The mining of textual artifacts is requisite for many important activities in software engineering: tracing of requirements; retrieval of components from a repository; location of manpage text for an area of question, etc. Many such activities leave the "final word" to the analyst --- have the relevant items been retrieved? are there other items that should have been retrieved? When analysts become a part of the text mining process, their decisions on the relevance of retrieved elements impact the final outcome of the activity. In this paper, we undertook a pilot study to examine the impact of analyst decisions on the final outcome of a task.},

urldate = {2011-11-21},

booktitle = {Proceedings of the International Workshop on Mining Software Repositories},

author = {Huffman Hayes, J. and Dekhtyar, A. and Sundaram, S.},

year = {2005},

keywords = {primary},

pages = {1--5},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\2JSGFKM5\Hayes et al. - 2005 - Text mining for software engineering how analyst .pdf:application/pdf}

@article{huffman_hayes_advancing_2006,

title = {Advancing candidate link generation for requirements tracing: the study of methods},

volume = {32},

abstract = {This paper addresses the issues related to improving the overall quality of the dynamic candidate link generation for the

requirements tracing process for Verification and Validation and Independent Verification and Validation analysts. The contribution of the paper is four-fold: We define goals for a tracing tool based on analyst responsibilities in the tracing process, we introduce several new measures for validating that the goals have been satisfied, we implement analyst feedback in the tracing process, and we present a prototype tool that we built, {RETRO} ({REquirements} {TRacing} On-target), to address these goals. We also present the results of a study used to assess {RETRO’s} support of goals and goal elements that can be measured objectively.},

number = {1},

journal = {Transactions on Software Engineering},

author = {Huffman Hayes, J. and Dekhtyar, A. and Sundaram, S.},

year = {2006},

keywords = {primary},

pages = {4--19}

@article{huffman_hayes_requirements_2007,

title = {{REquirements} {TRacing} On target ({RETRO):} Improving software maintenance through traceability recovery},

volume = {3},

abstract = {A number of important tasks in software maintenance require an up-to-date requirements traceability matrix ({RTM):} change impact analysis, determination of test cases to execute for regression testing, etc. The generation and maintenance of {RTMs} are tedious and error-prone, and they are hence often not done. In this paper, we present {REquirements} {TRacing} On-target ({RETRO)}, a special- purpose requirements tracing tool. We discuss how {RETRO} automates the generation of {RTMs} and present the results of a study comparing manual {RTM} generation to {RTM} generation using {RETRO.} The study showed that {RETRO} found significantly more correct links than manual tracing and took only one third of the time to do so. \© Springer-Verlag London Limited 2007.},

number = {3},

journal = {Innovations in Systems and Software Engineering},

author = {Huffman Hayes, J. and Dekhtyar, A. and Sundaram, S. and Holbrook, A. and Vadlamudi, S. and April, A.},

year = {2007},

keywords = {primary},

pages = {193--202}

@inproceedings{huffman_hayes_helping_2004,

title = {Helping analysts trace requirements: An objective look},

abstract = {This paper addresses the issues related to improving the overall quality of the requirements tracing process for Independent Verification and Validation analysts. The contribution of the paper is three-fold: we define requirements for a tracing tool based on analyst responsibilities in the tracing process; we introduce several new measures for validating that the requirements have been satisfied; and we present a prototype tool that we built, {RETRO} ({REquirements} {TRacing} On-target) to address these requirements. We also present the results of a study used to assess {RETRO's} support of requirements and requirement elements that can be measured objectively. \© 2004 {IEEE.}},

booktitle = {Proceedings of the International Conference on Requirements Engineering},

author = {Huffman Hayes, J. and Dekhtyar, A. and Sundaram, S. and Howard, S.},

year = {2004},

keywords = {primary},

pages = {249--259}

@inproceedings{huffman_hayes_software_2011,

title = {Software verification and validation research laboratory ({SVVRL)} of the University of Kentucky: traceability challenge 2011: language translation},

abstract = {We present the process and methods applied in undertaking the Traceability Challenge in addressing Grand Challenge C-{GC1} - Trace recovery. The Information Retrieval methods implemented in {REquirementsTRacing} On target {.NET} ({RETRO.NET)} were applied to the tracing of the {eTour} and {EasyClinic} datasets. Our work focused on the nuances of native language (Italian, English). Datasets were augmented with additional terms derived from splitting function and variable names with Camel-Back notation and using the Google Translate {API} to translate Italian terms into English. Results based on the provided answer set show that the augmented datasets significantly improved recall and precision for one of the datasets.},

urldate = {2011-11-21},

booktitle = {Proceeding of the 6th international workshop on Traceability in emerging forms of software engineering},

publisher = {{ACM}},

author = {Huffman Hayes, J. and Sultanov, H. and Kong, W. and Li, W.},

year = {2011},

keywords = {primary},

pages = {50--53},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\NI9AXR8J\Hayes et al. - 2011 - Software verification and validation research labo.pdf:application/pdf}

@inproceedings{jiang_incremental_2008,

title = {Incremental latent semantic indexing for automatic traceability link evolution management},

abstract = {Maintaining traceability links among software artifacts is particularly important for many software engineering tasks. Even though automatic traceability link recovery tools are successful in identifying the semantic connections among software artifacts produced during software development, no existing traceability link management approach can effectively and automatically deal with software evolution. We propose a technique to automatically manage traceability link evolution and update the links in evolving software. Our novel technique, called incremental Latent Semantic Indexing ({iLSI)}, allows for the fast and low-cost {LSI} computation for the update of traceability links by analyzing the changes to software artifacts and by reusing the result from the previous {LSI} computation before the changes. We present our {iLSI} technique, and describe a complete automatic traceability link evolution management tool, {TLEM}, that is capable of interactively and quickly updating traceability links in the presence of evolving software artifacts. We report on our empirical evaluation with various experimental studies to assess the performance and usefulness of our approach. \© 2008 {IEEE.}},

booktitle = {Proceedings of the 23rd International Conference on Automated Software Engineering},

author = {Jiang, H. and Nguyen, T. and Chen, I. and Jaygarl, H. and Chang, C.},

year = {2008},

keywords = {links2code, primary},

pages = {59--68}

@inproceedings{kaushik_reconstructing_2011,

title = {Reconstructing traceability between bugs and test cases: An experimental study},

shorttitle = {Reconstructing traceability between bugs and test cases},

abstract = {In manual testing, testers typically follow the steps listed in the bug report to verify whether a bug has been fixed or not. Depending on time and availability of resources, a tester may execute some additional test cases to ensure test coverage. In the case of manual testing, the process of finding the most relevant manual test cases to run is largely manual and involves tester expertise. From a usability standpoint, the task of finding the most relevant test cases is tedious as the tester typically has to switch between the defect management tool and the test case management tool in order to search for test cases relevant to the bug at hand. In this paper, we use {IR} techniques to recover trace ability between bugs and test cases with the aim of recommending test cases for bugs. We report on our experience of recovering trace ability between bugs and test cases using techniques such as Latent Semantic Indexing ({LSI)} and Latent Dirichlet Allocation ({LDA)} through a small industrial case study. © 2011 {IEEE.}},

booktitle = {Proceedings of the Working Conference on Reverse Engineering},

author = {Kaushik, N. and Tahvildari, L. and Moore, M.},

year = {2011},

keywords = {primary},

pages = {411--414},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\A7THTSE6\record.html:text/html}

@inproceedings{klock_traceclipse:_2011,

title = {Traceclipse: an eclipse plug-in for traceability link recovery and management},

shorttitle = {Traceclipse},

abstract = {Traceability link recovery is an active research area in software engineering with a number of open research questions and challenges, due to the substantial costs and challenges associated with software maintenance. We propose Traceclipse, an Eclipse plug-in that integrates some similar characteristics of traceability link recovery techniques in one easy-to-use suite. The tool enables software developers to specify, view, and manipulate traceability links within Eclipse and it provides an {API} through which recovery techniques may be added, specified, and run within an integrated development environment. The paper also presents initial case studies aimed at evaluating the proposed plug-in.},

urldate = {2011-11-21},

booktitle = {Proceeding of the 6th International Workshop on Traceability in Emerging Forms of Software Eengineering},

author = {Klock, S. and Gethers, M. and Dit, B. and Poshyvanyk, D.},

year = {2011},

keywords = {primary},

pages = {24--30},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\WZV4CZ3E\Klock et al. - 2011 - Traceclipse an eclipse plug-in for traceability l.pdf:application/pdf}

@inproceedings{kong_requirement_2009,

title = {A requirement traceability refinement method based on relevance feedback},

abstract = {In this paper, we conduct a study of using relevance feedback-based Information Retrieval ({IR)} methods to refine Requirement Traceability ({RT)} from requirement to code. We compare two representative feedback methods: Mixture Model ({MM)} in language model and Standard Rochio method ({SR)} in vector-space model. In order to assure the fairness of comparison, we also make modification for both of the methods. Initial experiment results on a real project data set show that 1) few iterations of feedback result in significant increases both in precision and recall; 2) feedback methods in language model are generally more stable than methods in vector-space model in improving precision, but the latter is more effective and can get better precision; 3) negative feedback information plays an important role in refining requirement traceability.},

booktitle = {Proceedings of the 21st International Conference on Software Engineering and Knowledge Engineering},

author = {Kong, L. and Li, J. and Li, Y. and Yang, Y. and Wang, Q.},

year = {2009},

keywords = {primary}

@inproceedings{kong_proximity-based_2011,

title = {Proximity-based traceability: An empirical validation using ranked retrieval and set-based measures},

shorttitle = {Proximity-based traceability},

abstract = {The automatic generation of traceability links attempts to reduce the burden of building requirements traceability matrices ({RTMs)} that will be vetted by a human analyst before use in verification and validation tasks such as criticality assessment or change impact analysis. Information Retrieval ({IR)} techniques, notably the Vector Space Model ({VSM)}, have been used with some success to build textual artifact traceability matrices. A limitation of the {VSM} is that it disregards word or term location and the relationship between words in the textual artifacts being traced. This paper presents a {VSM} enhancement with consideration for term location, validating it on four datasets using ranked retrieval and set-based measures. These two types of measures provide a more detailed comparison between the two traceability techniques. © 2011 {IEEE.}},

booktitle = {Proceedings of the 1st International Workshop on Empirical Requirements Engineering},

author = {Kong, W. and Huffman Hayes, J.},

year = {2011},

keywords = {primary},

pages = {45--52},

file = {Scopus - Login:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\MPRVH24P\record.html:text/html}

@inproceedings{kong_how_2011,

title = {How do we trace requirements: an initial study of analyst behavior in trace validation tasks},

abstract = {Traceability recovery is a tedious, error-prone, person-power intensive task, even if aided by automated traceability tools. Human analysts must vet candidate traceability links retrieved by such tools and must often go looking for links that such tools fail to locate as they build a traceability matrix. This paper examines a research version of the traceability tool {REquirements} {TRacing} On target ({RETRO)} that logs analyst

actions. We examine the user logs in order to understand how analysts work on traceability recovery tasks. Such information is a pre-requisite to understanding how to better design traceability tools to best utilize analyst time while developing a high quality final traceability matrix.},

urldate = {2011-11-21},

booktitle = {Proceeding of the 4th International Workshop on Cooperative and Human Aspects of Software Engineering},

author = {Kong, W. and Huffman Hayes, J. and Dekhtyar, A. and Holden, J.},

year = {2011},

keywords = {primary},

pages = {32--39},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\JFGDWVGW\Kong et al. - 2011 - How do we trace requirements an initial study of .pdf:application/pdf}

@inproceedings{leuser_challenges_2009,

title = {Challenges for semi-automatic trace recovery in the automotive domain},

abstract = {This position paper describes the challenges of semiautomatically recovering traceability information in specifications of the automotive domain using information retrieval techniques. Recovering traceability information itself is not an easy task and a combination of factors make this task even harder in the automotive domain. For example: There are huge specifications compared to specifications evaluated in research. The specifications are written in German instead of the English generally reported, which makes it even more complicated. Improving the applied algorithms is one way of tackling these obstacles, better guidance on how and when to employ the semi-automatic recovery methods also play an important role. An approach to tackle these obstacles as well as preliminary findings are described.$^{\textrm{1}}$ \© 2009 {IEEE.}},

booktitle = {Proceedings of the International Workshop on Traceability in Emerging Forms of Software Engineering},

author = {Leuser, J.},

year = {2009},

keywords = {primary},

pages = {31--35}

@inproceedings{leuser_tackling_2010,

title = {Tackling semi-automatic trace recovery for large specifications},

abstract = {[Context and motivation] Traceability is not as well established in the automobile industry as it is for instance in avionics. However, new standards require specifications to contain traces. Manually creating and maintaining traceability in large specifications is cumbersome and expensive. [Question/problem] This work investigates whether it is possible to semi-automatically recover traceability within natural language specifications (e.g. requirement and test specifications) using information retrieval algorithms. More specifically, this work deals with large, German specifications from the automobile industry. [Principal ideas/results] Using optimized algorithms, we are able to retrieve most of the traces. The remaining problem is the reduction of false-positive candidate traces. [Contribution] We identified optimizations that improve the retrieval quality: Use of meta-data, filtering of redundant texts, use of domain language, and dynamic identification of signals. \© 2010 Springer-Verlag.},

booktitle = {Requirements Engineering: Foundation for Software Quality},

author = {Leuser, J. and Ott, D.},

year = {2010},

keywords = {primary},

pages = {203--217}

@inproceedings{li_requirement-centric_2008,

title = {Requirement-centric traceability for change impact analysis: A case study},

abstract = {Requirement change occurs during the entire software lifecycle,which is not only inevitable but also necessary. However, uncontrolled requirementchange will lead to a huge waste of time and effort. Most studiesabout the change impact analysis assume changes take place in code, which resultsin the analysis only at the source code level and ignoring the requirementchange is the fundamental cause. This paper introduces a Requirement {CentricTraceability} ({RCT)} approach to analyze the change impact at the requirementlevel. The {RCT} combines with the requirement interdependency graph and dynamicrequirement traceability to identify the potential impact of requirementchange on the entire system in late phase. This approach has been successfullyapplied to a real-life project, and the benefits and lessons learned will also bediscussed.},

booktitle = {International Conference on Software Process},

author = {Li, Y. and Li, J. and Yang, Y. and Li, M.},

year = {2008},

keywords = {links2code, primary},

pages = {100--111}

@inproceedings{lormans_monitoring_2006,

title = {Monitoring requirements coverage using reconstructed views: An industrial case study},

abstract = {Requirements views, such as coverage and status views, are an important asset for monitoring and managing software development. We have developed a method that automates the process for reconstructing these views, and built a tool, {ReqAnalyst}, to support this method. In this paper, we investigate to what extent we can automatically generate requirements views to monitor requirements in test categories and test cases. The technique used for retrieving the necessary data is an information retrieval technique called Latent Semantic Indexing ({LSI).} We applied our method in a case study at {LogicaCMG.} We defined a number of requirements views and experimented with different reconstruction settings to generate these views.},

booktitle = {Procedings of the 13th Working Conference on Reverse Engineering},

author = {Lormans, M. and Gross, H-G. and van Deursen, A. and van Solingen, R. and Stehouwer, A.},

year = {2006},

keywords = {primary},

pages = {275--284}

@inproceedings{lormans_can_2006,

title = {Can {LSI} help reconstructing requirements traceability in design and test?},

abstract = {Managing traceability data is an important aspect of the software development process. In this paper we investigate to what extent latent semantic indexing ({LSI)}, an information retrieval technique, can help recovering the information needed for automatically reconstructing traceability during the development process. We experimented with two different link selection strategies and applied {LSI} in multiple case studies varying in size and context. We discuss the results of a small lab study, a larger case study and a large industrial case study.},

booktitle = {Proceedings of the 10th European Conference on Software Maintenance and Reengineering},

author = {Lormans, M. and van Deursen, A.},

year = {2006},

keywords = {primary},

pages = {45--54}

@article{lormans_industrial_2008,

title = {An industrial case study in reconstructing requirements views},

volume = {13},

abstract = {Requirements views, such as coverage and status views, are an important asset for monitoring and managing software development projects. We have developed a method that automates the process of reconstructing these views, and we have built a tool, {ReqAnalyst}, that supports this method. This paper presents an investigation as to which extent requirements views can be automatically generated in order to monitor requirements in industrial practice. The paper focuses on monitoring the requirements in test categories and test cases. In order to retrieve the necessary data, an information retrieval technique, called Latent Semantic Indexing, was used. The method was applied in an industrial study. A number of requirements views were defined and experiments were carried out with different reconstruction settings for generating these views. Finally, we explored how these views can help the developers during the software development process.},

number = {6},

journal = {Empirical Software Engineering},

author = {Lormans, M. and Van Deursen, A. and Gross, H-G.},

year = {2008},

keywords = {primary},

pages = {727--760}

@inproceedings{lucia_can_2006,

address = {Los Alamitos, {CA}, {USA}},

title = {Can information retrieval techniques effectively support traceability link recovery?},

abstract = {Applying information retrieval ({IR)} techniques to retrieve all correct links between software artefacts is in general impractical, as usually this means producing a high effort for discarding too many false positives. We show that the only way to recover traceability links using {IR} methods is to identify an "optimal" threshold that achieves an acceptable balance between traced links and false positives. Unfortunately, such threshold is not known a priori. For this reason we have devised the need to use an incremental traceability recovery approach to gradually identify the threshold where it is more convenient to stop the traceability recovery process, and provide evidence of this in a case study. We also report the experience of using the incremental traceability recovery during the development of software projects},

booktitle = {14th {IEEE} International Conference on Program Comprehension},

author = {Lucia, A. De and Fasano, F. and Oliveto, R. and Tortora, G.},

year = {2006},

note = {information retrieval techniques;traceability link {recovery;IR} method;software projects;},

keywords = {diagnostics;project, information, {IR}, link, management;, method;information, primary, projects;traceability, {recovery;Information}, retrieval, retrieval;program, techniques;software},

pages = {10 pp. –}

@inproceedings{mahmoud_using_2010,

title = {Using semantics-enabled information retrieval in requirements tracing: An ongoing experimental investigation},

abstract = {Requirements tracing is a central activity for software systems quality management. However, in large-scale evolving systems, maintaining traceability information manually can become a tedious task. To address this problem, several dynamic techniques were introduced to provide automatic traceability links generation. These techniques are usually based on information retrieval ({IR)} methods which link different artifacts based on their syntactic information. This paper reports an ongoing experimental investigation of using semantics-enabled {IR} methods to generate traceability links. Our goal is to explore dynamic, accurate, and conceptually rich ways to generate and maintain traceability information. \© 2010 {IEEE.}},

booktitle = {Proceedings of the International Computer Software and Applications Conference},

author = {Mahmoud, A. and Niu, N.},

year = {2010},

keywords = {{applications;Information}, automatic, Computer, {engineering;Semantics;}, generation;requirements, information, links, maintenance;software, {management;Information}, {management;Requirements}, primary, quality, quality;, {retrieval;Quality}, retrieval;software, systems, traceability, tracing;semantics-enabled},

pages = {246--247}

@inproceedings{mahmoud_source_2011,

title = {Source code indexing for automated tracing},

abstract = {Requirements-to-source-code traceability employs information retrieval ({IR)} methods to automatically link requirements to the source code that implements them. A crucial step in this process is indexing, where partial and important information from the software artifacts is converted into a representation that is compatible with the underlying {IR} model. Source code demands special attention in the indexing process. In this paper, we investigate source code indexing for supporting automatic traceability. We introduce a feature diagram that captures the key components and their relationships in the domain of source code indexing. We then present an experiment to examine the features of the diagram and their dependencies. Results show that utilizing comments has a significant effect on traceability link generation, and stemming is required when comments are considered.},

urldate = {2011-11-21},

booktitle = {Proceeding of the 6th International Workshop on Traceability in Emerging forms of Software Engineering},

author = {Mahmoud, A. and Niu, N.},

year = {2011},

keywords = {primary},

pages = {3--9},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\TM3UEX4F\Mahmoud and Niu - 2011 - Source code indexing for automated tracing.pdf:application/pdf}

@article{marcus_recovery_2005,

title = {Recovery of traceability links between software documentation and source code},

volume = {15},

abstract = {An approach for the semi-automated recovery of traceability links between software documentation and source code is presented. The methodology is based on the application of information retrieval techniques to extract and analyze the semantic information from the source code and associated documentation. A semi-automatic process is defined based on the proposed methodology. The paper advocates the use of latent semantic indexing ({LSI)} as the supporting information retrieval technique. Two case studies using existing software are presented comparing this approach with others. The case studies show positive results for the proposed approach, especially considering the flexibility of the methods used. \© World Scientific Publishing Company.},

number = {5},

journal = {International Journal of Software Engineering and Knowledge Engineering},

author = {Marcus, A. and Maletic, J. and Sergeyev, A.},

year = {2005},

keywords = {links2code, primary},

pages = {811--836}

@inproceedings{mcmillan_combining_2009,

title = {Combining Textual and Structural Analysis of Software Artifacts for Traceability Link Recovery},

abstract = {Existing methods for recovering traceability links among software documentation artifacts analyze textual similarities among these artifacts. It may be the case, however, that related documentation elements share little terminology or phrasing. This paper presents a technique for indirectly recovering these traceability links in requirements documentation by combining textual with structural information as we conjecture that related requirements share related source code elements. A preliminary case study indicates that our combined approach improves the precision and recall of recovering relevant links among documents as compared to stand-alone methods based solely on analyzing textual similarities.},

booktitle = {Proceedings of the International Workshop on Traceability in Emerging Forms of Software Engineering},

author = {{McMillan}, C. and Poshyvanyk, D. and Revelle, M.},

year = {2009},

keywords = {links2code, primary},

pages = {41--48}

@inproceedings{natt_och_dag_speeding_2004,

title = {Speeding up Requirements Management in a Product Software Company: Linking Customer Wishes to Product Requirements through Linguistic Engineering},

shorttitle = {Speeding up Requirements Management in a Product Software Company},

abstract = {Developing large complex software products aimed for a broad market involves a great flow of wishes and requirements. The former are elicited from customers while the latter are brought forth by the developing organization. These are preferably kept separated to preserve the different perspectives. The interrelationships should however be identified and maintained to enable well-founded decisions. Unfortunately, the current manual linkage is cumbersome, time-consuming, and error-prone. This work presents a pragmatic linguistic engineering approach to how statistical natural language processing may be used to support the manual linkage between customer wishes and product requirements by suggesting potential links. An evaluation with real requirements from industry is presented. It shows that in a realistic setting, automatic support could make linkage faster for at least 50\% of the links. An estimation based on our evaluation also shows that considerable time savings are possible. The results, together with the identified enhancement, are promising for improving software quality and saving time in industrial requirements engineering.},

urldate = {2011-11-21},

booktitle = {Proceedings of the 12th International Requirements Engineering Conference},

author = {{\{Natt} och Dag\}, J. and Gervasi, V. and Brinkkemper, S. and Regnell, B.},

year = {2004},

keywords = {primary},

pages = {283--294}

@article{natt_och_dag_feasibility_2002,

title = {A Feasibility Study of Automated Natural Language Requirements Analysis in Market-Driven Development},

volume = {7},

abstract = {In market-driven software development there is a strong need for support to handle congestion in the requirements engineering process, which may occur as the demand for short time-to-market is combined with a rapid arrival of new requirements from many different sources. Automated analysis of the continuous flow of incoming requirements provides an opportunity to increase the efficiency of the requirements engineering process. This paper presents empirical evaluations of the benefit of automated similarity analysis of textual requirements, where existing information retrieval

techniques are used to statistically measure requirements similarity. The results show that automated analysis of similarity among textual requirements is a promising technique that may provide effective support in identifying relationships between requirements.},

number = {1},

journal = {Requirements Engineering},

author = {{\{Natt} och Dag\}, J. and Regnell, B. and Carlshamre, P. and Andersson, M. and Karlsson, J.},

year = {2002},

keywords = {primary},

pages = {20--33}

@article{natt_och_dag_experiment_2006,

title = {An experiment on linguistic tool support for consolidation of requirements from multiple sources in market-driven product development},

volume = {11},

abstract = {This paper presents an experiment with a linguistic support tool for consolidation of requirements sets. The experiment is designed based on the requirements management process at a large market-driven software development company that develops generic solutions to satisfy many different customers. New requirements and requests for information are continuously issued, which must be analyzed and responded to. The new requirements should first be consolidated with the old to avoid reanalysis of previously elicited requirements and to complement existing requirements with new information. In the presented experiment, a new open-source tool is evaluated in a laboratory setting. The tool uses linguistic engineering techniques to calculate similarities between requirements and presents a ranked list of suggested similar requirements, between which links may be assigned. It is hypothesized that the proposed technique for finding and linking similar requirements makes the consolidation more efficient. The results show that subjects that are given the support provided by the tool are significantly more efficient and more correct in consolidating two requirements sets, than are subjects that do not get the support. The results suggest that the proposed techniques may give valuable support and save time in an industrial requirements consolidation process.},

number = {2},

urldate = {2011-11-21},

journal = {Empirical Software Engineering},

author = {{\{Natt} och Dag\}, J. and Thelin, T. and Regnell, B.},

year = {2006},

keywords = {primary},

pages = {303--329}

@inproceedings{oliveto_equivalence_2010,

title = {On the equivalence of information retrieval methods for automated traceability link recovery},

abstract = {We present an empirical study to statistically analyze the equivalence of several traceability recovery methods based on Information Retrieval ({IR)} techniques. The analysis is based on Principal Component Analysis and on the analysis of the overlap of the set of candidate links provided by each method. The studied techniques are the Jensen-Shannon ({JS)} method, Vector Space Model ({VSM)}, Latent Semantic Indexing ({LSI)}, and Latent Dirichlet Allocation ({LDA).} The results show that while {JS}, {VSM}, and {LSI} are almost equivalent, {LDA} is able to capture a dimension unique to the set of techniques which we considered. \© 2010 {IEEE.}},

booktitle = {International Conference on Program Comprehension},

author = {Oliveto, R. and Gethers, M. and Poshyvanyk, D. and De Lucia, A.},

year = {2010},

keywords = {links2code, primary},

pages = {68--71}

@article{park_implementation_2000,

title = {Implementation of an Efficient Requirements Analysis Supporting System Using Similarity Measure Techniques},

volume = {42},

abstract = {As software becomes more complicated and larger, software engineer's requirements analysis become important and uneasy activity. This paper proposes a requirements analysis supporting system that supports informal requirements analysis. The proposed system measures the similarity between requirement sentences to identify possible redundancies and inconsistencies, and extracts the possible ambiguous requirements. The similarity measurement method combines a sliding window model and a parser model. Using these methods, the proposed system supports to trace dependency between

documents and improve quality of requirement sentences. Efficiency of the proposed system and a process for requirement specification analysis using the system are presented.},

number = {6},

journal = {Information and Software Technology},

author = {Park, S. and Kim, H. and Ko, Y. and Seo, J.},

year = {2000},

keywords = {primary},

pages = {429--438}

@inproceedings{parvathy_comparative_2008,

title = {A comparative study of document correlation techniques for traceability analysis},

abstract = {One of the important aspects of software engineering is to ensure traceability across the development lifecycle. Traceability matrix is widely used to check for completeness and to aid impact analysis. We propose that this computation of traceability can be automated by looking at the correlation between the documents. This paper describes and compares four novel approaches for traceability computation based on text similarity, term structure and inter-document correlation algorithms. These algorithms base themselves on different information retrieval techniques for establishing document correlation. Observations from our experiments are also presented. The advantages and disadvantages of each of these approaches are discussed in detail. Various scenarios where these approaches would be applicable and the future course of action are also discussed.},

booktitle = {Proceedings of the 10th International Conference on Enterprise Information Systems, Information Systems Analysis and Specification},

author = {Parvathy, A. G and Vasudevan, B. G and Balakrishnan, R.},

year = {2008},

keywords = {analysis;, engineering;text, information, primary, retrieval;software},

pages = {64--69}

@inproceedings{port_experiences_2011,

title = {Experiences with text mining large collections of unstructured systems development artifacts at {JPL}},

abstract = {Often repositories of systems engineering artifacts at {NASA's} Jet Propulsion Laboratory ({JPL)} are so large and poorly structured that they have outgrown our capability to effectively manually process their contents to extract useful information. Sophisticated text mining methods and tools seem a quick, low-effort approach to automating our limited manual efforts. Our experiences of exploring such methods mainly in three areas including historical risk analysis, defect identification based on requirements analysis, and over-time analysis of system anomalies at {JPL}, have shown that obtaining useful results requires substantial unanticipated efforts - from preprocessing the data to transforming the output for practical applications. We have not observed any quick 'wins' or realized benefit from short-term effort avoidance through automation in this area. Surprisingly we have realized a number of unexpected long-term benefits from the process of applying text mining to our repositories. This paper elaborates some of these benefits and our important lessons learned from the process of preparing and applying text mining to large unstructured system artifacts at {JPL} aiming to benefit future {TM} applications in similar problem domains and also in hope for being extended to broader areas of applications.},

language = {English},

booktitle = {Proceedings of the 33rd International Conference on Software Engineering},

author = {Port, D. and Nikora, A. and Hihn, J. and {LiGuo} Huang},

year = {2011},

keywords = {primary},

pages = {701--710},

file = {IEEE Xplore Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\TAXK7XNH\Port et al. - 2011 - Experiences with text mining large collections of .pdf:application/pdf}

@inproceedings{settimi_supporting_2004,

title = {Supporting software evolution through dynamically retrieving traces to {UML} artifacts},

abstract = {The ability to trace new and changed requirements to their impacted components provides critical support for managing change in an evolving software system. Unfortunately numerous studies have shown the difficulties of maintaining links using traditional traceability methods. Information retrieval techniques can be used to dynamically generate traces and alleviate the need to maintain explicit links, however prior work in this area has focused primarily on establishing intra-requirement links or links between requirements and code. We compare several retrieval techniques for generating links between requirements, code, and {UML} models. Tracing to {UML} elements provides a higher perspective on the proposed change than would be possible if links were generated directly to the code and supports the growing trend towards model driven development. Our experiment returned better results for establishing links to {UML} artifacts than to code, suggesting the usefulness of establishing links to code via {UML} artifacts. We conclude the paper by discussing the implications of this approach for managing the evolution of a software system.},

booktitle = {Proceedings of the 7th International Workhop on Principles of Software Evolution},

author = {Settimi, R. and Cleland-Huang, J. and Ben Khadra, O. and Mody, J. and Lukasik, W. and {DePalma}, C.},

year = {2004},

keywords = {artifacts;, development;, driven, dynamic, evolution;, explicit, generation;, information, intra-requirement, Language;, links;, maintenance;, maintenance;software, methods;, Model, Modeling, object-oriented, primary, programming;, programming;software, prototyping;, {prototyping;Unified}, retrieval;, retrieval;object-oriented, software, system;, trace, traceability, {UML}, Unified},

pages = {49--54}

@inproceedings{sultanov_application_2010,

title = {Application of swarm techniques to requirements engineering: Requirements tracing},

abstract = {We posit that swarm intelligence can be applied to effectively address requirements engineering problems. Specifically, this paper demonstrates the applicability of swarm intelligence to the requirements tracing problem using a simple ant colony algorithm. The technique has been validated using two real-world datasets from two problem domains. The technique can generate requirements traceability matrices ({RTMs)} between textual requirements artifacts (high level requirements traced to low level requirements, for example) with equivalent or better accuracy than traditional information retrieval techniques. \© 2010 {IEEE.}},

booktitle = {Proceedings of the 18th International Requirements Engineering Conference},

author = {Sultanov, H. and Huffman Hayes, J.},

year = {2010},

keywords = {primary},

pages = {211--220}

@inproceedings{sundaram_baselines_2005,

title = {Baselines in requirements tracing},

abstract = {We summarize the results of our requirements tracing work to date, focusing on our empirical results with open source datasets. Specifically, we describe the problem of after-the-fact requirements tracing for Verification and Validation ({V\&V)} analysts, we provide a brief overview of Information Retrieval methods we have applied as well as measures used to evaluate them, we describe our tracing tool, and we present the results of a number of empirical studies. Two of the open source datasets that we have used are available to the research community at {http://promise.site.uottawa.ca/SERepository/.}},

urldate = {2011-11-21},

booktitle = {Proceedings of the Workshop on Predictor Models in Software Engineering},

author = {Sundaram, S. and Huffman Hayes, J. and Dekhtyar, A.},

year = {2005},

keywords = {primary},

pages = {1--6},

file = {ACM Full Text PDF:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\SGXV9Z5B\Sundaram et al. - 2005 - Baselines in requirements tracing.pdf:application/pdf}

@article{sundaram_assessing_2010,

title = {Assessing traceability of software engineering artifacts},

volume = {15},

abstract = {The generation of traceability links or traceability matrices is vital to many software engineering activities. It is also person-power intensive, time-consuming, error-prone, and lacks tool support. The activities that require traceability information include, but are not limited to, risk analysis, impact analysis, criticality assessment, test coverage analysis, and verification and validation of software systems. Information Retrieval ({IR)} techniques have been shown to assist with the automated generation of traceability links by reducing the time it takes to generate the traceability mapping. Researchers have applied techniques such as Latent Semantic Indexing ({LSI)}, vector space retrieval, and probabilistic {IR} and have enjoyed some success. This paper concentrates on examining issues not previously widely studied in the context of traceability: the importance of the vocabulary base used for tracing and the evaluation and assessment of traceability mappings and methods using secondary measures. We examine these areas and perform empirical studies to understand the importance of each to the traceability of software engineering artifacts. \© 2010 Springer-Verlag London Limited.},

number = {3},

journal = {Requirements Engineering},

author = {Sundaram, S. and Huffman Hayes, J. and Dekhtyar, A. and Holbrook, A.},

year = {2010},

keywords = {primary},

pages = {313--335}

@article{wang_recovering_2009,

title = {Recovering Relationships between Documentation and Source Code based on the Characteristics of Software Engineering},

volume = {243},

abstract = {Software documentation is usually expressed in natural languages, which contains much useful information. Therefore, establishing the traceability links between documentation and source code can be very helpful for software engineering management, such as requirement traceability, impact analysis, and software reuse. Currently, the recovery of traceability links is mostly based on information retrieval techniques, for instance, probabilistic model, vector space model, and latent semantic indexing. Previous work treats both documentation and source code as plain text files, but the quality of retrieved links can be improved by imposing additional structure using that they are software engineering documents. In this paper, we present four enhanced strategies to improve traditional {LSI} method based on the special characteristics of documentation and source code, namely, source code clustering, identifier classifying, similarity thesaurus, and hierarchical structure enhancement. Experimental results show that the first three enhanced strategies can increase the precision of retrieved links by 5 \% \&sim; 16 \%, while the the fourth strategy is about 13\%. \© 2009 Elsevier {B.V.} All rights reserved.},

journal = {Electronic Notes in Theoretical Computer Science},

author = {Wang, X. and Lai, G. and Liu, C.},

year = {2009},

keywords = {links2code, primary},

pages = {121--137}

@inproceedings{winkler_trace_2009,

title = {Trace retrieval for evolving artifacts},

abstract = {{IR-based} trace retrieval is a method to derive traceability links using information retrieval ({IR)} algorithms. So far, this method has been applied only to static artifacts. In this paper, we address {IR-based} trace retrieval applied to evolving artifacts. We describe our enhancements to existing algorithms and present first promising results by applying the enhanced algorithm to two different data sets. \© 2009 {IEEE.}},

booktitle = {Proceedings of the International Workshop on Traceability in Emerging Forms of Software Engineering},

author = {Winkler, S.},

year = {2009},

keywords = {primary},

pages = {49--56}

@article{yadla_tracing_2005,

title = {Tracing requirements to defect reports: an application of information retrieval techniques},

volume = {1},

shorttitle = {Tracing requirements to defect reports},

abstract = {To support debugging, maintenance, verification and validation ({V\&V)} and/or independent {V\&V} ({IV\&V)}, it is necessary to understand the relationship between defect reports and their related artifacts. For example, one cannot correct a code-related defect report without being able to find the code that is affected. Information retrieval ({IR)} techniques have been used effectively to trace textual artifacts to each other. This has generally been applied to the problem of dynamically generating a trace between artifacts in the software document hierarchy after the fact (after development has proceeded to at least the next lifecycle phase). The same techniques can also be used to trace textual artifacts of the software engineering lifecycle to defect reports. We have applied the term frequency–inverse document frequency ({TF-IDF)} technique with relevance feedback, as implemented in our requirements tracing on-target ({RETRO)} tool, to the problem of tracing textual requirement elements to related textual defect reports. We have evaluated the technique using a dataset for a {NASA} scientific instrument. We found that recall of over 85\% and precision of 69\%, and recall of 70\% and precision of 99\% could be achieved, respectively, on two subsets of the dataset.},

urldate = {2011-11-21},

journal = {Innovations in Systems and Software Engineering},

author = {Yadla, S. and Huffman Hayes, J. and Dekhtyar, A.},

year = {2005},

keywords = {primary},

pages = {116--124},

file = {"Tracing Requirements to Defect Reports: " by Suresh Yadla, et al.:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\CKNUAC2N\20.html:text/html}

@inproceedings{zhao_understanding_2003,

title = {Understanding how the requirements are implemented in source code},

abstract = {For software maintenance and evolution, a common problem is to understand how each requirement is implemented in the source code. The basic solution of this problem is to find the fragment of source code that is corresponding to the implementation of each requirement. This can be viewed as a requirement-slicing problem - slicing the source code according to each individual requirement. We present an approach to find the set of functions that is corresponding to each requirement. The main idea of our method is to combine the information retrieval technology with the static analysis of source code structures. First, we retrieve the initial function sets through some information retrieval model using functional requirements as the queries and identifier information (such as function names, parameter names, variable names etc.) of functions in the source code as target documents. Then we complement each retrieved initial function set by analyzing the call graph extracted from the source code. A premise of our approach is that programmers should use meaningful names as identifiers. Furthermore, we perform an experimental study based on a {GNU} system. We use two basic metrics: precision and recall (which are the common practice in the information retrieval field), to evaluate our approach. We also compare the results directly acquired from information retrieval with those that are complemented through static source code structure analysis.},

booktitle = {Proceedings of the Asia-Pacific Software Engineering Conference},

author = {Zhao, W. and Zhang, L. and Liu, Y. and Luo, J. and Sun, J. S.},

year = {2003},

keywords = {links2code, primary},

pages = {68--77}

@inproceedings{zhou_clustering-based_2007,

title = {A clustering-based approach for tracing object-oriented design to requirement},

abstract = {Capturing the traceability relationship between software requirement and design allows developers to check whether the design meets the requirement and to analyze the impact of requirement changes on the design. This paper presents an approach for identifying the classes in object-oriented software design that realizes a given use case, which leverages ideas and technologies from Information Retrieval ({IR)} and Text Clustering area. First, we represent the use case and all classes as vectors in a vector space constructed with the keywords coming from them. Then, the classes are clustered based on their semantic relevance and the cluster most related to the use case is identified. Finally, we supplement the raw cluster by analyzing structural relationships among classes. We conduct an experiment by using this clustering-based approach to a system - Resource Management Software. We calculate and compare the precision and recall of our approach and nonclustering approaches, and get promising results.},

booktitle = {Proceedings of the 10th International Conference on Fundamental Approaches to Software Engineering},

author = {Zhou, X. and Yu, H.},

year = {2007},

keywords = {links2code, primary},

pages = {412--422}

@inproceedings{zou_phrasing_2006,

title = {Phrasing in dynamic requirements trace retrieval},

abstract = {Dynamic trace retrieval provides an alternate option to traditional traceability methods such as matrices, hyperlinks, and manual link construction. Instead of relying upon manually constructed and maintained traces, links are generated dynamically on an 'as-needed' basis using information retrieval techniques. Prior work in this area has indicated that in order to retrieve between 90\% to 95\% of the correct traces, only low precision levels can be obtained, which means that analysts must spend time filtering out unwanted links. This paper describes a method for improving the precision of trace results through incorporating the use of phrases detected and constructed from requirements using a part-of-speech tagger. A project glossary is also used to find additional phrases and weight the contributions of key phrases and terms. The approach is implemented in a probabilistic trace retrieval tool and evaluated through a series of experiments. The results show that phrasing can significantly increase the accuracy of the dynamic trace retrieval tool by generally increasing precision, and also by moving good trace links towards the top of the candidate links list. \© 2006 {IEEE.}},

booktitle = {Proceedings of the International Computer Software and Applications Conference},

author = {Zou, X. and Settimi, R. and Cleland-Huang, J.},

year = {2006},

keywords = {aided, Computer, {engineering;Telecommunication}, exp, glossaries;information, {GOLD}, links;, links2code, primary, retrieval;probability;, software, sysmap},

pages = {265--272},

file = {ACM Snapshot:C:\Users\Markus\AppData\Roaming\Mozilla\Firefox\Profiles\cibtu6ix.default\zotero\storage\JST9PFUT\citation.html:text/html}

@inproceedings{zou_evaluating_2008,

title = {Evaluating the Use of Project Glossaries in Automated Trace Retrieval},

abstract = {Automated traceability methods use information retrieval techniques to dynamically generate traceability links, however they suffer from precision problems. This paper extends our previous work in using a project glossary to improve trace results and presents criteria for evaluating whether an existing project glossary can be used to enhance results in a given project. A new approach for automatically extracting a set of important terms and phrases is also described. Our experimental results suggest that these terms and phrases can be used effectively in lieu of a project glossary to help improve precision of the retrieved traces.},

booktitle = {Proceedings of the International Conference on Software Engineering Research and Practice},

author = {Zou, X. and Settimi, R. and Cleland-Huang, J.},

year = {2008},

keywords = {{Automation;Glossaries;Information}, diagnostics;, engineering;, formal, primary, {retrieval;Information}, retrieval;program, {services;Software}, specification;information},

pages = {157--163}

@article{zou_improving_2010,

title = {Improving automated requirements trace retrieval: A study of term-based enhancement methods},

volume = {15},

abstract = {Automated requirements traceability methods that utilize Information Retrieval ({IR)} methods to generate and maintain traceability links are often more efficient than traditional manual approaches, however the traces they generate are imprecise and significant human effort is needed to evaluate and filter the results. This paper investigates and compares three term-based enhancement methods that are designed to improve the performance of a probabilistic automated tracing tool. Empirical studies show that the enhancement methods can be effective in increasing the accuracy of the retrieved traces; however the effectiveness of each method varies according to specific project characteristics. The analysis of such characteristics has lead to the development of two new project-level metrics which can be used to predict the effectiveness of each enhancement method for a given data set. A procedure to automatically extract critical keywords and phrases from a set of traceable artifacts is also presented to enhance the automated trace retrieval algorithm. The procedure is tested on two new datasets. \© Springer Science + Business Media, {LLC} 2009.},

number = {2},

journal = {Empirical Software Engineering},

author = {Zou, X. and Settimi, R. and Cleland-Huang, J.},

year = {2010},

keywords = {primary},

pages = {119--146}

}

Page updated

Google Sites

Report abuse