%% This BibTeX bibliography file was created using BibDesk. %% http://www.cs.ucsd.edu/~mmccrack/bibdesk.html %% Created for David at 2008-06-24 15:49:30 +0100 %% Saved with string encoding Western (ASCII) @phdthesis{reitter2008thesis, Abstract = {This thesis addresses the cognitive basis of syntactic adaptation, which biases speakers to repeat their own syntactic constructions and those of their conversational partners. I address two types of syntactic adaptation: short-term priming and long-term adaptation. I develop two metrics for syntactic adaptation within a speaker and between speakers in dialogue: one for short-term priming effects that decay quickly, and one for long-term adaptation over the course of a dialogue. Both methods estimate adaptation in large datasets consisting of transcribed human-human dialogue annotated with syntactic information. Two such corpora in English are used: Switchboard, a collection of spontaneous phone conversation, and HCRC Map Task, a set of task-oriented dialogues in which participants describe routes on a map to each other. I find both priming and long-term adaptation in both corpora, confirming well-known experimental results (Bock, 1986). I extend prior work by showing that syntactic priming effects not only apply to selected syntactic constructions that are alternative realizations of the same semantics, but still hold when a broad variety of syntactic phrase-structure rules are considered. Each rule represents a cognitive decision during syntactic processing. I show that the priming effect for a rule is inversely proportional to its frequency. With this methodology, I test predictions of the Interactive Alignment Model (Pickering & Garrod, 2004). The IAM claims that linguistic and situation-model agreement between interlocutors in dialogue is the result of a cascade of resource-free, mechanistic priming effects on various linguistic levels. I examine task-oriented dialogue in Map Task, which provides a measure of task success through the deviance of the communicated routes on the maps. I find that long-term syntactic adaptation predicts communicative success, and it does so earlier than lexical adaptation. The result is applied in a machine-learning based model that estimates task success based on the dialogue, capturing 14 percent of the variance in Map Task. Short-term syntactic priming differs qualitatively from long-term adaptation, as it does not predict task success, providing evidence against learning as a single cognitive basis of adaptation effects. I obtain further evidence for the correlation between semantic activity and syntactic priming through a comparison of the Map Task and Switchboard corpora, showing that short-term priming is stronger in task-oriented dialogue than in spontaneous conversation. This difference is evident for priming between and within speakers, which suggests that priming is a mechanistic rather than strategic effect. I turn to an investigation of the level at which syntactic priming influences language production. I establish that the effect applies to structural syntactic decisions as opposed to all surface sequences of lexical categories. To do so, I identify pairs of part-of-speech categories which consistently cross constituent boundaries defined by the phrase-structure analyses of the sentences. I show that such distituents are less sensitive to priming than pairs occurring within constituents. Thus, syntactic priming is sensitive to syntactic structure. The notion of constituent structure differs among syntactic models. Combinatory Categorial Grammar (CCG, Steedman, 2000) formalizes flexible constituent structure, accounting a varying degree of incrementality in syntactic sentence planning. I examine whether priming effects can support the predictions of CCG using the Switchboard corpus, which has been annotated with CCG syntax. I confirm the syntactic priming effect for lexical and non-lexical CCG categories, which encode partially satisfied subcategorization frames. I then show that both incremental and normal-form constituent structures exhibit priming, arguing for language production accounts that support flexible incrementality. The empirical results are reflected in a cognitive model of syntactic realization in language production. The model assumes that language production is subject to the same principles and constraints as any other form of cognition and follows the ACT-R framework (Anderson, 2004). Its syntactic process implements my empirical results on priming and is based on CCG. Syntactic planning can take place incrementally and non-incrementally. The model is able to generate simple sentences that vary syntactically, similar to the materials used in the experimental priming literature. Syntactic adaptation emerges due to a preferential and sped-up memory retrieval of syntactic categories describing linearization and subcategorization requirements. Long-term adaptation is explained as a form of learning, while short-term priming is the result of a combination of learning and spreading activation from semantic and lexical material. Simulations show that the model produces the adaptation effects and their inverse frequency interaction, as well as cumulativity of long-term adaptation. }, Author = {David Reitter}, Date-Added = {2008-03-28 23:01:40 +0000}, Date-Modified = {2008-06-24 15:49:30 +0100}, Pubtype = {thesis}, School = {University of Edinburgh}, Title = {Context Effects in Language Production: Models of Syntactic Priming in Dialogue Corpora}, Year = {2008}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2008phd.pdf}} @misc{reitter2006rule, Address = {Nijmegen, Netherlands}, Author = {David Reitter and Johanna D. Moore and Frank Keller}, Date-Added = {2008-03-28 22:52:45 +0000}, Date-Modified = {2008-03-28 23:20:52 +0000}, Howpublished = {Talk at the 12th Annual Conference on Architectures and Mechanisms for Language Processing}, Pubtype = {conference}, Title = {Corpus-based Evidence for Syntactic Priming as Rule Repetition}, Year = {2006}} @misc{reitter2007sequence, Address = {La Jolla, CA}, Author = {David Reitter and Frank Keller and Julia Hockenmaier}, Date-Added = {2008-03-28 22:51:20 +0000}, Date-Modified = {2008-03-28 23:20:31 +0000}, Howpublished = {Poster at 20th Annual CUNY Conference on Human Sentence Processing}, Pubtype = {conference}, Title = {Corpus-based evidence against sequence priming}, Year = {2007}} @misc{reitter2007successful, Address = {La Jolla, CA}, Author = {David Reitter and Johanna D. Moore}, Date-Added = {2008-03-28 22:50:18 +0000}, Date-Modified = {2008-03-28 23:20:21 +0000}, Howpublished = {Poster at 20th Annual CUNY Conference on Human Sentence Processing}, Pubtype = {conference}, Title = {Successful dialogue requires syntactic alignment}, Year = {2007}} @misc{reitter2008the-repetition, Address = {Chapel Hill, NC}, Author = {David Reitter}, Date-Added = {2008-03-28 22:49:40 +0000}, Date-Modified = {2008-03-28 23:19:53 +0000}, Howpublished = {Poster at 21st Annual CUNY Conference on Human Sentence Processing}, Pubtype = {conference}, Title = {The repetition of general lexical material boosts structural priming in language production}, Year = {2008}} @misc{reitter2008structural, Address = {Chapel Hill, NC}, Author = {David Reitter and Frank Keller}, Date-Added = {2008-03-28 22:45:36 +0000}, Date-Modified = {2008-03-28 23:20:11 +0000}, Howpublished = {Talk at 21st Annual CUNY Conference on Human Sentence Processing}, Pubtype = {conference}, Title = {Structural priming in language production as the result of learning and spreading activation in an ACT-R model}, Year = {2008}} @inproceedings{reitter2006methods, Author = {David Reitter and Charles Callaway}, Booktitle = {Open Mic Session, Fourth International Natural Language Generation Conference}, Date-Added = {2007-11-05 18:43:02 +0000}, Date-Modified = {2008-04-07 12:46:38 +0100}, Pubtype = {conference}, Title = {Methods, Requirements and Licenses for Shared {NLG} Resources}, Year = {2006}} @inproceedings{reitter2007against, Abstract = {Structural priming, i.e., the tendency to repeat linguistic material, can be explained by two alternative representational assumptions: either as the repetition of hierarchical representations generated by syntactic rules, or as the repetition of lexical sequences. We present two studies that test these explanations by investigating priming effects in a dialogue corpus. We compare syntactic constituents with distituents, i.e., part-of-speech pairs that cross constituent boundaries. We find a reliable short-term priming effect for constituents, but no priming for distituents. This result supports the rule-based view of priming, which does not predict priming of distituents. The data are incompatible with a sequence priming analysis, which cannot distinguish between constituents and distituents. In a second corpus study, we study long-term priming and find priming effects for both constituents and distituents. This indicates that the mechanism underlying long-term adaptation differs substantially from short-term priming. }, Address = {Nashville, TN}, Author = {David Reitter and Frank Keller}, Booktitle = {Proceedings of the 29th Annual Conference of the Cognitive Science Society (CogSci)}, Date-Added = {2007-04-12 16:29:44 +0100}, Date-Modified = {2008-04-07 12:45:53 +0100}, Pages = {1421-1426}, Pubtype = {refereed-ws}, Title = {Against Sequence Priming: Evidence from Constituents and Distituents in Corpus Data}, Year = {2007}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2007sequence_priming.pdf}} @inproceedings{reitter2007predicting, Abstract = {Task-solving in dialogue depends on the linguistic alignment of the interlocutors, which Pickering & Garrod (2004) have suggested to be based on mechanistic repetition effects. In this paper, we seek confirmation of this hypothesis by looking at repetition in corpora, and whether repetition is correlated with task success. We show that the relevant repetition tendency is based on slow adaptation rather than short-term priming and demonstrate that lexical and syntactic repetition is a reliable predictor of task success given the first five minutes of a task-oriented dialogue. }, Address = {Prague, Czech Republic}, Author = {David Reitter and Johanna D. Moore}, Booktitle = {Proceedings of the 45th Annual Meeting of the Association of Computational Linguistics (ACL)}, Date-Added = {2007-03-23 15:35:51 +0000}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pages = {808-815}, Pubtype = {refereed}, Title = {Predicting Success in Dialogue}, Year = {2007}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2007predicting_task_success.pdf}} @conference{hachey2006dimensionality, Abstract = {A key task in an extraction system for query-oriented multi-document summarisation, necessary for computing relevance and redundancy, is modelling text semantics. In the Embra system, we use a representation derived from the singular value decomposition of a term co-occurrence matrix. We present methods to show the reliability of performance improvements. We find that Embra performs better with dimensionality reduction.}, Address = {Sydney, Australia}, Author = {Ben Hachey and Gabriel Murray and David Reitter}, Booktitle = {Proceedings of the COLING-ACL Workshop Task-Focused Summarization and Question Answering 2006}, Date-Added = {2006-05-23 11:04:02 +0100}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pages = {1-7}, Pubtype = {refereed-ws}, Title = {Dimensionality Reduction Aids Term Co-Occurrence Based Multi-Document Summarization}, Year = {2006}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/hachey2006dimensionality.pdf}} @conference{dzikovska2006data-driven, Abstract = {We describe results from analyzing a corpus of human-human tutorial dialogue, which are aimed at identifying, formalizing and automating natural tutoring in the domain of calculus, in particular symbolic differentiation. We analyzed a corpus of human-human tutoring dialogues, focusing on three areas important to system development: interleaved symbolic and natural language, domain modeling, and tutorial dialogue strategies. We provide empirical verification of previous results on interleaving natural and symbolic language, and show that the properties of interleaving are highly dependent on the input modality. We describe a task model for our domain, and provide corpus data to show that the model must cover basic algebra skills as well, which are involved in differentiation. We verify the applicability of an existing annotation scheme for tutoring algebra in our domain, and propose that it be extended to cover student initiative. }, Address = {Riva del Garda, Italy}, Author = {Myroslava O. Dzikovska and David Reitter and Johanna D. Moore and Claus Zinn}, Booktitle = {Proceedings of the ECAI Workshop on Language-Enhanced Educational Technology}, Date-Added = {2006-05-19 12:46:06 +0100}, Date-Modified = {2008-01-28 16:54:12 +0000}, Editor = {Charles Callaway}, Pages = {22-28}, Pubtype = {refereed-ws}, Title = {Data-driven Modelling of Human Tutoring in Calculus}, Year = {2006}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/dzikovska2006lam.pdf}} @conference{reitter2006ccg, Abstract = { This paper presents a corpus-based account of structural priming in human sentence processing, focusing on the role that syntactic representations play in such an account. We estimate the strength of structural priming effects from a corpus of spontaneous spoken dialogue, annotated syntactically with Combinatory Categorial Grammar (CCG) derivations. This methodology allows us to test a range of predictions that CCG makes about priming. In particular, we present evidence for priming between lexical and syntactic categories encoding partially satisfied subcategorization frames, and we show that priming effects exist both for incremental and normal-form CCG derivations.}, Address = {Sydney, Australia }, Author = {David Reitter and Julia Hockenmaier and Frank Keller}, Booktitle = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, Date-Added = {2006-05-19 12:44:30 +0100}, Date-Modified = {2008-03-27 08:26:31 +0000}, Pages = {308-316}, Pubtype = {refereed}, Title = {Priming Effects in {C}ombinatory {C}ategorial {G}rammar}, Year = {2006}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2006ccg.pdf}} @inproceedings{reitter2006computational, Abstract = {Syntactic priming effects, modeled as increase in repetition probability shortly after a use of a syntactic rule, have the potential to improve language processing components. We model priming of syntactic rules in annotated corpora of spoken dialogue, extending previous work that was confined to selected constructions. We find that speakers are more receptive to priming from their interlocutor in task-oriented dialogue than in sponaneous conversation. Low-frequency rules are more likely to show priming.}, Address = {New York, NY}, Author = {David Reitter and Frank Keller and Johanna D. Moore}, Booktitle = {Proceedings of Human Language Technology Conference/North American Chapter of the Association for Computational Linguistics (HLT/NAACL)}, Date-Added = {2006-04-26 11:24:52 +0100}, Date-Modified = {2008-02-19 16:11:22 +0000}, Pages = {121-124}, Pubtype = {refereed-ws}, Title = {Computational Modeling of Structural Priming in Dialogue}, Year = {2006}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2006computational.pdf}} @inproceedings{reitter2006priming, Abstract = { Previous work provided corpus evidence for structural priming for specific syntactic constructions. The present paper extends these results by investigating priming effects involving arbitrary syntactic rules in spoken dialogue corpora. We demonstrate the existence of within- and between-speaker priming in both spontaneous conversation (the Switchboard corpus) and task-oriented dialogue (the Map Task corpus). We also find that between-speaker priming is stronger in the Map Task corpus. This supports the hypothesis that in task-oriented dialog, low-level priming is linked to higher-level alignment of situation models. }, Address = {Vancouver, Canada}, Author = {David Reitter and Johanna D. Moore and Frank Keller}, Booktitle = {Proceedings of the 28th Annual Conference of the Cognitive Science Society (CogSci)}, Date-Added = {2006-03-21 16:49:35 +0000}, Date-Modified = {2008-04-07 12:46:10 +0100}, Pages = {685-690}, Pubtype = {refereed-ws}, Title = {Priming of Syntactic Rules in Task-Oriented Dialogue and Spontaneous Conversation}, Year = {2006}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2006priming.pdf}} @inproceedings{hachey2005duc, Abstract = {We present the Embra system, a first-time entry to DUC for 2005 which performed at or above median for the manual assessment of responsiveness and on 4 out of 5 linguistic quality questions. The system takes a novel approach to relevance and redundancy, modeling sentence similarity using a latent semantic space constructed over a very large corpus. We present a simple approach to modeling specificity based on named entities which shows a small improvement over baseline. Finally, we discuss coherence and present a sentence reordering algorithm with a component-level evaluation demonstrating a positive effect.}, Address = {Vancouver, Canada}, Author = {Ben Hachey and Gabriel Murray and David Reitter}, Booktitle = {Document Understanding Conference 2005}, Date-Added = {2005-09-26 12:49:34 +0100}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pubtype = {refereed-ws}, Title = {The {E}mbra System at {DUC} 2005: Query-oriented Multi-document Summarization with a Very Large Latent Semantic Space}, Year = {2005}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/hachey2005embra.pdf}} @mastersthesis{reitter2004hybrid, Abstract = {The output of multimodal human-computer interfaces is what this thesis is concerned with. Rather than hard-coding graphical and spoken representations, methods are introduced that plan and realize coherent output, appropriate to the situation and the device. The generation system expects a mode- and language-independent representation, as it can be supplied by the dialogue management component of a dialogue system. The generator then assembles mode-specific rendering instructions simultaneously for each mode with the aid of a unification-based functional grammar. The approach proposed in this thesis abandons the canonical structure of pipelined planning and realization in natural language generation, in favor of hard constraints formulated in a grammar, and soft constraints that allow for the gradual adaptivity of the output. The grammar is constructed to ensure the coherence of output in different modalities, whose output is generated in a synchronized fashion rather than by separate, mode-specific generators. The soft constraints follow some of the Gricean maxims by incorporating two counteracting communicative goals: efficacy and efficiency. A fitness function encoding these goals takes into account situation- and user-specific factors, such as distractions in a single mode or the user's sensory impairments. The function leads to the selection of an appropriate output from the variety of potential outputs generated by the grammar. It is evaluated in a study with human subjects. The thesis presents a unification based, hybrid grammar formalism which can combine pre-fabricated phrases and linguistically motivated grammar fragments, and an associated algorithm which integrates the formulation of grammars that lead to cross-modally coherent output. Methods are compared to efficiently implement a control strategy, combining hard and soft constraints as a constraint optimization problem. The cross-modal coherence implemented by the grammar formalism is motivated by known phenomena, such as cross-modal priming, or alignment between interlocutors. To optimize discourse coherence, central ideas of Centering Theory are implemented using the grammar formalism. Finally, novel methods and a ready-to-use implementation are introduced which allow user interface developers to inspect, maintain and extend grammars. The formalism and generation implementation is demonstrated with a grammar for a mobile, multimodal application, the Virtual Personal Assistant. }, Author = {David Reitter}, Booktitle = {MSc thesis}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pubtype = {thesis}, School = {University College Dublin}, Title = {Hybrid Planning and Realization of Coherent Utterances for Multimodal Natural Language Dialogue Systems}, Year = {2004}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2004hybrid-multimodal-generation_compressed.pdf}} @inproceedings{dolfing2005fasil, Abstract = {In the context of the FASiL project, we have studied natural language interactions in a unimodal (speech only) and multimodal (speech and graphics) interface to a personal information management database. We collected multilingual corpora to investigate these interactions in three languages: Portuguese, English and Swedish. The corpora are used to train language models, to update acoustic models, to study semantic concepts, multimodal interactions, and dialogue management strategies. The corpora are annotated in a uniform way, with timings, transcriptions, and semantics. In this paper, we report on the structure and design of the corpora which are now available via ELRA. }, Author = {Hans Dolfing and David Reitter and Luis Almeida and Nuno Beires and Michael Cody and Rui Gomes and Kerry Robinson and Roman Zielinski}, Booktitle = {Proceedings INTER/EUROSPEECH 2005}, Date-Added = {2005-07-29 14:16:21 +0100}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pubtype = {refereed-ws}, Title = {The {F}{A}{S}{i}{L} Speech and Multimodal Corpora}, Year = {2005}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/dolfing2005fasilcorpora.pdf}} @inproceedings{reitter04mug, Abstract = {When grammar-based techniques for natural language generation (and analysis alike) find their way into collaborative projects or actual application, big grammars tend to become hard to extend and debug. The MUG system represents a new tool set with a graphical debugging environment for functional unification grammars, which is designed to help grammar developers inspect the results of their work. The particular formalism supported is Multimodal Functional Unification Grammar (MUG, [4]), which is similar to Functional Unification Grammars (FUG: [2], [1]), but supports several coordinated modes, such as voice prompts or structural and/or language-based screen displays. For each input description, the grammar can generate a range of coherent realization variants, which are ranked by a scoring function in order to optimize the output towards situational and device-related factors. }, Author = {David Reitter}, Booktitle = {Third International Conference on Natural Language Generation. 2nd Volume; posters.}, Date-Modified = {2008-01-28 16:54:12 +0000}, Location = {Brockenhurst, U.K.}, Pubtype = {refereed-ws}, Title = {A Development Environment for Multimodal Functional Unification Generation Grammars}, Year = {2004}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter2004inlg.pdf}} @inproceedings{panttaja04eval, Abstract = {Adaptable multimodal systems are difficult to test. We present a methodology for evaluating parallel multimodal output which is generated in response to a specific set of user, device and situation constraints. Our method involves ranking of many potential output variants using a fitness function, and selecting well-differentiated variants for user testing. We focus on the generation of multiple variants of graphical user interfaces for small-screen devices and natural language voice output, within a system we term UI on the Fly.}, Author = {Erin Panttaja and David Reitter and Fred Cummins}, Booktitle = {Proceedings of the DUMAS Workshop on Robust and Adaptive Information Processing for Mobile Speech Interfaces}, Date-Modified = {2008-01-28 16:54:12 +0000}, Location = {Geneva, Switzerland}, Pages = {69-76}, Pubtype = {refereed-ws}, Title = {The Evaluation of Adaptable Multimodal System Outputs}, Year = {2004}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/panttaja-etal_evaluation_2004.pdf}} @inproceedings{reitter04ui, Abstract = {UI on the Fly is a system that dynamically presents coordinated multimodal content through natural language and a small-screen graphical user interface. It adapts to the user's preferences and situation. Multimodal Functional Unification Grammar (MUG) is a unification-based formalism that uses rules to generate content that is coordinated across several communication modes. Faithful variants are scored with a heuristic function.}, Author = {David Reitter and Erin Panttaja and Fred Cummins}, Booktitle = {Proceedings of Human Language Technology conference 2004 / North American chapter of the Association for Computational Linguistics (HLT/NAACL-04)}, Date-Modified = {2008-01-28 16:54:12 +0000}, Location = {Boston}, Pubtype = {refereed-ws}, Title = {{UI} on the Fly: Generating a Multimodal User Interface}, Year = {2004}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter-etal_uifly_2004.pdf}} @mastersthesis{reitter2003rhetorical, Abstract = {Most text displays an internal coherence structure, which can be analyzed as a tree structure of relations that hold between short segments of text. We present a machine-learning governed approach to such an analysis in the framework of Rhetorical Structure Theory. Our rhetorical analyzer observes a variety of textual properties, such as cue phrases, part-of-speech information, rhetorical context and lexical chaining. A two-stage parsing algorithm uses local and global optimization to find an analysis. Decisions during parsing are driven by an ensemble of support vector classifiers. This training method allows for a non-linear separation of samples with many relevant features. We define a chain of annotation tools that profits from a new underspecified representation of rhetorical structure. Classifiers are trained on a newly introduced German language corpus, as well as on a large English one. We present evaluation data for the recognition of rhetorical relations.}, Author = {David Reitter}, Booktitle = {Diplomarbeit}, Date-Modified = {2008-03-29 22:18:57 +0000}, Note = {Best thesis award 2001/2002 at the GLDV'03 spring conference.}, Pubtype = {thesis}, School = {University of Potsdam}, Title = {Rhetorical Analysis with Rich-Feature Support Vector Models}, Year = {2003}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter_rstsvm-thesis_2003.pdf}} @inproceedings{reitter2003urml, Abstract = {While quite a few linguistic corpora with syntactic annotations are available today, resources are scarce on the level of discourse annotation. A flexible, extendible annotation format speeds up development. We therefore propose an XML format for annotating rhetorical structure trees. In human and automatic analysis, rhetorical structure is often difficult and assigned incrementally. Thus, the format allows for underspecification. The paper discusses the various design decisions involved, illustrates the format with an example, and sketches some applications. }, Address = {Budapest, Hungary}, Author = {David Reitter and Manfred Stede}, Booktitle = {Proceedings of the 4th International Workshop on Linguistically Interpreted Corpora (LINC-03) (at EACL 2003)}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pages = {77-84}, Pubtype = {refereed-ws}, Title = {Step by step: underspecified markup in incremental rhetorical analysis}, Year = {2003}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter-stede_urml_2003.pdf}} @article{reitter03analysis, Abstract = {Most text displays an internal coherence structure, which can be analyzed as a tree structure of relations that hold between short segments of text. We present a machine-learning governed approach to such an analysis in the framework of Rhetorical Structure Theory. Our rhetorical analyzer observes a variety of textual properties, such as cue phrases, part-of-speech information, rhetorical context and lexical chaining. A two-stage parsing algorithm uses local and global optimization to find an analysis. Decisions during parsing are driven by an ensemble of support vector classifiers. This training method allows for a non-linear separation of samples with many relevant features. We define a chain of annotation tools that profits from a new underspecified representation of rhetorical structure. Classifiers are trained on a newly introduced German language corpus, as well as on a large English one. We present evaluation data for the recognition of rhetorical relations. }, Address = {St. Augustin, Germany}, Annote = {Best Thesis Award of the Society for Computational Linguistics and Language Technology, Germany, 2003}, Author = {David Reitter}, Date-Modified = {2008-04-07 12:57:27 +0100}, Journal = {LDV-Forum, GLDV Journal for Computational Linguistics and Language Technology}, Number = {1/2}, Pages = {38-52}, Publisher = {Gardez}, Pubtype = {refereed}, Title = {Simple Signals for Complex Rhetorics: On rhetorical analysis with rich-feature support vector models}, Volume = {18}, Year = {2003}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter_complex-rst_2003.pdf}} @inproceedings{reitter2002guessing, Abstract = {I present a statistical-based approach to the part-of-speech guessing problem. I see assigning a part-of-speech, such as Adjective or Noun , as a classification problem. My guessing framework, which relies on automated learning of a language model, is described in detail. The rich feature analysis presented is suitable for linguistic data, such as the ones observed in German. I use a large margin classifier learning algorithm to select relevant features and learn appropriate labelling. The system is evaluated using a German corpus. }, Address = {Potsdam, Germany}, Author = {David Reitter}, Booktitle = {Proceedings of the 12th Student Conference on Computational Linguistics ({TaCoS})}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pubtype = {other-ws}, Title = {Statistical Part-of-Speech Guessing for {G}erman: Support Vector Classifiers versus Voting}, Year = {2002}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter_posguessing_2002.pdf}} @proceedings{tacos2002proceedings, Address = {Potsdam, Germany}, Editor = {David Reitter}, Publisher = {University of Potsdam}, Pubtype = {other-ws}, Title = {Proceedings of the 12th {S}tudent {C}onference on {C}omputational {L}inguistics ({TaCoS})}, Year = {2002}} @inproceedings{berger2002case, Abstract = {We describe our ongoing work on an application of XML/XSL technology to a dictionary, from whose source representation various views for the human reader as well as for automatic text generation and understanding are derived. Our case study is a dictionary of discourse markers, the words (often, but not always, conjunctions) that signal the presence of a disocurse relation between adjacent spans of text. }, Address = {Taipei, Taiwan}, Author = {Daniela Berger and David Reitter and Manfred Stede}, Booktitle = {Proceedings of the 2nd Workshop on NLP and XML (NLPXMP-2002), (at COLING 2002)}, Date-Modified = {2008-01-28 16:54:12 +0000}, Pubtype = {refereed-ws}, Title = {X{M}{L}/{X}{S}{L} in the Dictionary: The Case of Discourse Markers}, Year = {2002}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/bergeretal_xmldiscmarkers_2002.pdf}} @techmanual{reitter02latex, Abstract = {Drawing rhetorical analyses is no fun when you need to change and update diagrams as you refine your work, or, more importantly, if a lot of analyses are to be drawn. Voila, there we go: This package enables us to typeset beautiful diagrams with no hassle. It is oriented towards the style of the diagrams shown in Mann{\&}Thompson's Rhetorical Structure Theory and subsequent works. This package works perfectly with (LaTeX) and pdfLaTeX and does not require any special postscript capabilities in the output side. }, Author = {David Reitter}, Date-Modified = {2008-03-29 22:12:00 +0000}, Howpublished = {\url{http://www.david-reitter.com/compling/papers/reitter_rstpackage_2002.pdf} as of 02/2003}, Pubtype = {other}, Title = {Rhetorical theory in {L}a{T}e{X} with the `rst' package. {T}echnical Manual}, Year = {2002}, Bdsk-Url-1 = {http://www.david-reitter.com/compling/papers/reitter_rstpackage_2002.pdf}} @inproceedings{reitter-hybrid, Abstract = { CyMON-NLU can inform, chat and gather user information using an advanced natural language understanding engine. It combines statistical morphosyntactic disambiguation methods (trigram tagging), a stemming algorithm and a robust parser for a large semantic grammar implemented in an XML formalism. The scalable CyMON-NLU engine is implemented in C++ and provides interfaces to the agent-based CRM platform CyMON. Further features include automatic language detection and dialog tracking using a semantic network interface. A development kit enables language engineers to easily create semantic grammars for the specific domain.}, Author = {David Reitter and Stefan Covaci and Florin Oltean and Catalin Bacanu and Traian Serbanuta}, Booktitle = {Proceedings of the 11th Student Conference on Computational Linguistics (TaCoS)}, Date-Modified = {2008-03-28 23:01:06 +0000}, Location = {Heidelberg, Germany}, Pubtype = {refereed-ws}, Title = {Hybrid Natural Language Processing in a Customer-Care Environment}, Year = {2001}}