#!/usr/bin/perl $\ = "\n"; ## just a simple perl module that I use so that I can avoid having to ## remember all the little details needed to open a database. mostly ## useful when I don't want to use something like DBIx::Recordset package database; use DBI; ### change these values to alter which database gets used ## add to this list if you want to use a different server. my %databases = (SQLite => {dsn => "dbi:SQLite:dbname=", defaultdb => 'feedback.db', dbusername => '', dbpassword => '', }, ); ## this sets which server to use by default $defaultserver = 'SQLite'; sub new { my ($class, $dbname, $dbserver) = @_; my $server = $databases{$dbserver || $defaultserver}; my $dsn = $$server{dsn} . ($dbname || $$server{defaultdb}); my $dbusername = $$server{dbusername}; my $dbpassword = $$server{dbpassword}; return DBI->connect($dsn, $dbusername, $dbpassword, { RaiseError => 1, AutoCommit => 0 }); } 1; package feedback; use Data::Dumper; #open the database, and create the tables if they don't already exist my $dbh = new database(); @tables = $dbh->tables('%', '%', 'timeseries', 'TABLE'); unless ($tables[0]) { eval { print "Creating database tables"; local $/ = undef; $dbh->do($_) foreach (split /;\n*/, ) }; if ($@) { warn "Transaction aborted: $@\n"; $dbh->rollback(); exit; } else { $dbh->commit(); } } $dbh->{AutoCommit} = 1; my $inserttime = $dbh->prepare("INSERT INTO timeseries (msgid, date) VALUES (?, ?)"); use Net::NNTP; use News::Archive; use News::Article; use DateTime; use DateTime::Format::Strptime; my $group = "mozilla.feedback"; my $strp = new DateTime::Format::Strptime(pattern => '%a, %d %b %Y %T %z', locale => 'en_US', time_zone => 'UTC'); # open or initialize the archive my $archive = new News::Archive('basedir' => '.', 'debug' => 0); $archive->subscribe($group) unless $archive->subscribed($group); $archive->group($group); # get all of the message ids in the group $nntp = Net::NNTP->new("news.mozilla.org"); @msgs = @{$nntp->listgroup($group)}; my $total = $#msgs + 1; my $progress = 0; print "$total to download."; # download, save and analyze the messages that we don't already have for $msgnum (@msgs) { $progress++; my $percent = sprintf("%.2f", $progress/$total*100); print "$progress/$total ($percent%)" if !($progress % 100) or $progress == $total; unless ($archive->nntpstat($msgnum)) { my $msg = $nntp->article($msgnum); analyze($msgnum, $msg); $archive->save_article($msg, $group); } } sub analyze() { my $msgnum = shift; my $article = News::Article->new(shift); my $datestring = $article->header('Date'); my $date = $strp->parse_datetime($article->header('Date')); $inserttime->execute($msgnum, $date); #my $subarticle = News::Article->new($article->body()); #my ($name, $email, $product, $summary) = $subarticle->headers(['Name', 'Email', 'Product', 'Summary']); #my $comments = cleancomments($subarticle->body()); } sub cleancomments() { return grep !/^Comments:$/, @_; } 1; __DATA__ CREATE TABLE timeseries ( msgid INTEGER PRIMARY KEY, date TEXT )