#! /usr/bin/perl

# $Id: gnome-bbs2html2,v 1.15 2002/05/23 05:52:40 mickn Exp $

use strict;
use Jcode;
use Getopt::Std;
use DB_File;
use vars qw($BRD_FILE $BRD_HTML $BRD_DB
	    $CACHE_DIR $SUBJ_MODE $BdToUrl $opt_f $opt_n $CGISUB);
use vars qw($REVISION);

$REVISION = q,$Revision: 1.15 $,;

getopts('fn');

$BRD_FILE = '/etc/2ch/2channel.brd';
$CACHE_DIR = sprintf("%s/.2ch", $ENV{HOME});
$BRD_HTML = "$CACHE_DIR/2channel.html";
$BRD_DB = "$CACHE_DIR/2channel.db";
$SUBJ_MODE = 1;
$CGISUB = 'test';

$BdToUrl = \&bd2url_new2ch;

if (! -d $CACHE_DIR) {
    mkdir $CACHE_DIR, 0700;
}

&main();

exit 0;

sub main
{
    my $origarg = '';
    my $url = '';
    my $host = '';
    my $param = '';
    my $board = '';
    my $subj_type = 0;
    if (@ARGV) {
	$origarg = $url = $ARGV[0];
    }
    if ($url =~ m,^//2ch\.net/?$,) {
	&list_boards_2ch_net();
	return 0;
    } elsif ($url =~ m,^//slashdot\.jp/?$,) {
	&list_boards_slashdot('slashdot.jp', 'rdf');
	return 0;
    } elsif ($url =~ m,^//slashdot\.org/?$,) {
	&list_boards_slashdot('slashdot.org', 'rss');
	return 0;
    }
    if ($url =~ /^(.*);(order=coll)$/) {
	$url = $1;
	$param = $2;
    }
    if ($url =~ m,//([-a-z0-9]+\.2ch\.net)/([^/]+)/?$,i) {
	$host = lc($1);
	$url = '';
	$board = $2;
	$BdToUrl = \&bd2url_new2ch;
	$subj_type = 1;
    } elsif ($url =~ m,//(www2\.kitanet\.ne\.jp)/(~mail/2ch)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$BdToUrl = \&bd2url_old2ch;
     } elsif ($url =~ m,//(.*\.bbspink\.com)/([^/]+)/?$,i) {
	$host = $1;
	$url = '';
	$board = $2;
	$BdToUrl = \&bd2url_new2ch;
	$subj_type = 1;
     } elsif ($url =~ m,//(.*\.2ch2\.net)(/([^/]+))?/([^/]+)/?$,i) {
	$host = $1;
	$url = $3;
	$board = $4;
	$BdToUrl = \&bd2url_old2ch;
    } elsif ($url =~ m,//(.*\.onputan\.com)(.*)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$url =~ s,^/,,;
	$BdToUrl = \&bd2url_old2ch;
    } elsif ($url =~ m,//(.*\.f2s\.com)(.*)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$url =~ s,^/,,;
	$BdToUrl = \&bd2url_old2ch;
    } elsif ($url =~ m,//(.*\.infoseek\.co\.jp)(.*)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$url =~ s,^/,,;
	$BdToUrl = \&bd2url_old2ch;
    } elsif ($url =~ m,//(.*\.xrea\.com)(.*)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$url =~ s,^/,,;
	$BdToUrl = \&bd2url_old2ch2;
    } elsif ($url =~ m,//(www\.shitaraba\.com)/(bbs)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$BdToUrl = \&bd2url_shitaraba;
    } elsif ($url =~ m,//([-a-z0-9]+\.jbbs\.net)/([^/]+)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$BdToUrl = \&bd2url_jbbs;
    } elsif ($url =~ m,//([-a-z0-9\.]+)(.*)/([^/]+)/?$,i) {
	$host = $1;
	$url = $2;
	$board = $3;
	$url =~ s,^/,,;
	if ($host =~ /^exbbs/) {
	    $CGISUB = 'cgi';
	} elsif ($host eq 'mitinoku.jp') {
	    $CGISUB = '';
	}
	$BdToUrl = \&bd2url_old2ch;
    } else {
	$url =~ s/&/&amp;/g;
	$url =~ s/\042/&quot;/g;
	$url =~ s/</&lt;/g;
	$url =~ s/>/&gt;/g;
	print "<html><body><h1>Error!</h1><code>$origarg</code></body></html>\n";
	return;
    }
    my $board_title = '';
    my %db;
    if (tie %db, 'DB_File', $BRD_DB, O_RDONLY, 0644, $DB_HASH) {
	$board_title = "bbs:" . $db{$origarg};
	untie %db;
    } else {
	$board_title = "bbs:$board";
    }
    &head($board_title);
    &list_subjects_2ch($host, $url, $board, $param, $subj_type);
    &foot();
}

sub list_subjects_2ch
{
    my ($host, $url, $board, $param, $subj_type) = @_;

    my $cache = sprintf("%s/%s_%s_subject.txt", $CACHE_DIR, $host, $board);
    if ($opt_f) {
	&get_subject_txt($host, $url, $cache, $subj_type);
    } elsif (!$opt_n) {
	if (! -f $cache) {
	    &get_subject_txt($host, $url, $board, $cache, $subj_type);
	} else {
	    my $mtime = (stat(_))[9];
	    if (time - $mtime > 60) {
		&get_subject_txt($host, $url, $board, $cache, $subj_type);
	    }
	}
    }
    my $file = '';
    my $conv = 0;
    my $ostr = '';
    if ($param eq 'order=coll') {
	$file = "${cache}.coll";
	$url = "/$url" if $url ne '';
	printf("<p>[<a href=\"bbs://%s%s/%s/\">active 順</a>]</p>\n",
	       $host, $url, $board);
    } else {
	$file = $cache;
	$conv = 1;
	$url = "/$url" if $url ne '';
	printf("<p>[<a href=\"bbs://%s%s/%s/;order=coll\">サブジェクト順</a>]</p>\n",
	       $host, $url, $board);
    }
    open FH, $file or return;
    while (<FH>) {
	if (/^([^,]+),(\d+),(.*)$/) {
	    $ostr = sprintf("<a href=\"%s\" target=\"thread\">%s (%d)</a><br>\n",
		    &$BdToUrl($host, $url, $board, $1), $3, $2);
	    print Jcode->new($ostr, 'euc')->utf8;
	}
    }
    close FH;
}

sub bd2url_new2ch
{
    my ($host, $url, $board, $key) = @_;
    return sprintf("http://%s/test/read.cgi/%s/%d/l50", $host, $board, $key);
}

sub bd2url_old2ch
{
    my ($host, $url, $board, $key) = @_;
    $CGISUB = "/$CGISUB" if $CGISUB ne '';
    return sprintf("http://%s%s%s/read.cgi?bbs=%s&key=%s&ls=50",
		   $host, $url, $CGISUB, $board, $key);
}

sub bd2url_old2ch2
{
    my ($host, $url, $board, $key) = @_;
    return sprintf("http://%s%s/read.cgi?bbs=%s&key=%s&ls=50",
		   $host, $url, $board, $key);
}

sub bd2url_shitaraba
{
    my ($host, $url, $board, $key) = @_;
    return sprintf("http://www.shitaraba.com/cgi-bin/read.cgi?key=%s&bbs=%s&ls=50", $key, $board);
}

sub bd2url_jbbs
{
    my ($host, $url, $board, $key) = @_;
    return sprintf("http://%s/%s/bbs/read.cgi?BBS=%s&KEY=%s&LAST=50",
		   $host, $url, $board, $key);
}

sub get_subject_txt
{
    my ($host, $url, $board, $cache, $subj_type) = @_;

    my @s = ();
    if ($subj_type) {
	my $subj_url = sprintf("http://%s/%s/%s/subback.html",
			       $host, $url, $board);
	&get_file_by_http($subj_url, "${cache}.in");
	&parse_subback_html($cache, \@s);
    } else {
	my $subj_url = sprintf("http://%s/%s/%s/subject.txt",
			       $host, $url, $board);
	&get_file_by_http($subj_url, "${cache}.in");
	&parse_subject_txt($cache, \@s);
    }

    open OUT, "> ${cache}.coll" or return;
    foreach my $s (sort bysubj @s) {
	print OUT $s, "\n";
    }
    close OUT;
}

sub parse_subback_html
{
    my ($cache, $subj_array) = @_;
    open FH, "${cache}.in" or return;
    open OUT, "> $cache" or return;
    @$subj_array = ();
    while (<FH>) {
	my $s = Jcode->new($_, 'sjis')->euc;
	if ($s =~ m,<a href="(\d+)\/.*?">\d+(?::|：)\s*(.*)\((\d+)\)</a>,) {
	    my ($key, $subj, $renum) = ($1, $2, $3);
	    my $s = sprintf "%s,%d,%s", $key, $renum, $subj;
	    print OUT $s, "\n";
	    &cleanup_subj(\$subj);
	    push @$subj_array, $s;
	}
    }
    close FH;
    close OUT;
}

sub parse_subject_txt
{
    my ($cache, $subj_array) = @_;

    open FH, "${cache}.in" or return;
    open OUT, "> $cache" or return;
    @$subj_array = ();
    while (<FH>) {
	chop if /\n$/;
	chop if /\r$/;
	$_ = Jcode->new($_)->euc;
	my ($key, $subj, $renum);
	if (/^(\d+)\.dat<>(.*)\s*\((\d+)\)$/) {
	    ($key, $subj, $renum) = ($1, $2, $3);
	} elsif (/^(\d+)\.dat,(.*)（(\d+)）$/) {
	    ($key, $subj, $renum) = ($1, $2, $3);
	} elsif (/^(\d+_\d+)<>(.*)<>(\d+)<><>NULL<>$/) {
	    ($key, $subj, $renum) = ($1, $2, $3);
	} elsif (/^(\d+)\.cgi,(.*)\((\d+)\)$/) {
	    ($key, $subj, $renum) = ($1, $2, $3);
	}
	my $s = sprintf "%s,%d,%s", $key, $renum, $subj;
	print OUT $s, "\n";
	&cleanup_subj(\$subj);
	push @$subj_array, $s;
    }
    close FH;
}

sub cleanup_subj
{
    my $s = shift;
    $$s =~ s/^(\s|　|○|●|□|■|◇|◆|☆|★|△|▲|▽|▼|♪|＿|\/|&hearts;)*//;
    $$s =~ s/(\s|　|○|●|□|■|◇|◆|☆|★|△|▲|▽|▼|♪|＿|\/|&hearts;)*$//;
}

sub normalize_subject
{
    my $s = shift;
    $$s =~ s/^(「|【|『)*//;
}

sub bysubj
{
    my ($sa) = ($a =~ /^\d+,\d+,(.*)$/);
    my ($sb) = ($b =~ /^\d+,\d+,(.*)$/);
    &normalize_subject(\$sa);
    &normalize_subject(\$sb);
    return $sa cmp $sb;
}

sub head
{
    my $title = shift;
    print <<"HTML";
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"> 
<html lang="ja">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  <title>$title</title>
</head>
<body>
<h1>$title</h1>
HTML
;
}

sub foot
{
    print <<FOOT;
<hr>
<p><em>galeon-2ch</em> $REVISION</p>
</body>
</html>
FOOT
}

sub list_boards_2ch_net
{
    my $plain_mtime = (stat($BRD_FILE))[9];
    my $html_mtime = -e $BRD_HTML ? (stat($BRD_HTML))[9] : -1;
    my $db_mtime = -e $BRD_DB ? (stat($BRD_DB))[9] : -1;
    if ($plain_mtime > $html_mtime ||
	$plain_mtime > $db_mtime) {
	return unless (&convert_board_list());
    }

    &head("板一覧");
    print "<table width=\"90%\" cellpadding=\"3\" border=\"1\">\n";
    if (open HTML, $BRD_HTML) {
	while (<HTML>) {
	    print $_;
	}
	close HTML;
    } else {
	print "<p><font color=\"red\">Cannot open $BRD_HTML: $!</font></p>\n";
    }
    print "</table>\n";
    print "</center>\n";
    &foot();
}

sub convert_board_list
{
    open IFH, $BRD_FILE or return undef;
    unless (open OFH, "> $BRD_HTML") {
	close IFH;
	return undef;
    }
    unlink($BRD_DB) if -e $BRD_DB;
    my %db;
    unless (tie %db, 'DB_File', $BRD_DB, O_CREAT, 0644, $DB_HASH) {
	close IFH;
	close OFH;
	return undef;
    }
    my $ver = <IFH>;
    my $f = 0;
    my $c = 0;
    my $COLSIZE = 5;
    print OFH "<center>\n";
    while (<IFH>) {
	chop if /\n$/;
	chop if /\r$/;
	my @t = split /\t/, Jcode->new($_, 'sjis')->utf8;
	if (@t == 2) {
	    if ($c != 0) {
		printf OFH "<td colspan=\"%d\"></td>", $COLSIZE - $c;
		print  OFH "</tr>\n";
	    }
	    print OFH "</table>\n" if $f;
	    print OFH "<table width=\"90%\" cellpadding=\"3\" border=\"1\">\n";
	    printf(OFH "<tr><th colspan=\"%d\">%s</th></tr>\n",
		   $COLSIZE, $t[0]);
	    $f = 1;
	    $c = 0;
	} else {
	    my $url;
	    # 0     1        2         3
	    #  <tab>HOST<tab>BOARD<tab>NAME
	    if ($t[1] =~ /\.2ch\.net$/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^www2\.kitanet\.ne\.jp\//) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^www\.shitaraba\.com\//) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^[-0-9a-z]+\.jbbs\.net\//) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.bbspink\.com/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.2ch2\.net/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.xrea\.com/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.onputan\.com/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.f2s\.com/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /^.*\.infoseek\.co\.jp/) {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
	    } elsif ($t[1] =~ /ahiru\.dip\.jp/) {
		next;
	    } else {
		$url = sprintf("bbs://%s/%s/", $t[1], $t[2]);
		#print STDERR "NOT Supported: $t[1]\n";
		#next;
	    }
	    if ($c == 0) {
		print OFH "<tr>";
	    }
	    my $width = int(100 / $COLSIZE);
	    $width = 100 - $width * ($COLSIZE - 1) if $c == $COLSIZE - 1;
	    printf OFH "<td width=\"%d%%\"><a href=\"%s\" target=\"ita\">%s</a></td>", $width, $url, $t[3];
	    if ($url =~ m,^bbs:(.*)$,) {
		$db{$1} = $t[3];
	    }
	    if ($c == $COLSIZE - 1) {
		print OFH "</tr>\n";
	    }
	    $c++;
	    $c = 0 if ($c >= $COLSIZE);
	}
    }
    close IFH;
    close OFH;
    untie %db;
    return 1;
}

sub list_boards_slashdot
{
    my ($domain, $ext) = @_;

    use XML::LibXSLT;
    use XML::LibXML;

    my $rdf = sprintf("%s/%s.%s", $CACHE_DIR, $domain, $ext);

    &get_file_by_http("http://${domain}/slashdot.${ext}", $rdf);

    my $parser = XML::LibXML->new();
    my $xslt = XML::LibXSLT->new();

    my $source = $parser->parse_file($rdf);
    my $style_doc = $parser->parse_file("/etc/2ch/${domain}.xsl");
    
    my $stylesheet = $xslt->parse_stylesheet($style_doc);

    my $results = $stylesheet->transform($source);

    print $stylesheet->output_string($results);
}

sub get_file_by_http
{
    my ($url, $file) = @_;

    require LWP::UserAgent;
    require HTTP::Request;
    my $ua = LWP::UserAgent->new;
    my $req = HTTP::Request->new('GET', $url);
    my $res = $ua->request($req, $file);
}

# end of script
