@article{spam2012, author = "Francisco Javier Salcedo Campos and Jes{\'u}s Esteban D{\'i}az Verdejo and Garc{\'i}a-Teodoro, Pedro ", abstract = "‘Spammers exploit the popularity and low cost of e-mail services to send unsolicited messages (spam), which fill users’ accounts and waste valuable resources. To combat this problem, many different spam filtering techniques have been proposed in the literature. Nevertheless, most current anti-spamming filtering schemes are based on detecting relevant terms or tokens in the entire message or in only the body, which implies an invasion of users’ privacy. In this paper, a novel spam-filtering technique based solely on the information present in headers is introduced. In this approach, headers are considered as the result of a dynamic process that generates characters. The observed characters are treated as signals and parameterised in accordance with standard signal pre-processing techniques by extracting relevant parameters from the header. From this, Hidden Markov Models (HMMs) are considered for a spam detection system. The performance achieved by our proposal is evaluated and compared with that of other pattern classification paradigms used for spam filtering. The experimental results for SpamAssassin, TREC05 and CEAS 2008 Lab Evaluation improve on those results obtained with other widely used techniques, achieving up to 98.42% of spam detection while keeping the false positive rate below 0.4% and with the added advantages of using only information from the headers and being independent of the language in which the e-mail is written.", doi = "10.1016/j.ins.2012.01.022", issn = "0020-0255", journal = "Information Sciences", keywords = "Spam detection,Hidden Markov Model,Mail header,Histogram", number = "45-61", title = "{S}egmental {P}arameterisation and {S}tatistical {M}odelling of {E}-mail {H}eaders for {S}pam {D}etection", url = "http://www.sciencedirect.com/science/article/pii/S0020025512000412", volume = "195", year = "2012", }