paritybit.ca

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 478212cb79d6a185de8d69210c174817043e7c0b
parent ef822c8a1f62983930676ca8cbff5e74721278c8
Author: Jake Bauer <jbauer@paritybit.ca>
Date:   Sun, 31 May 2020 22:33:49 -0400

Progress on gopherize

Diffstat:
Mgopherize | 61+++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/gopherize b/gopherize @@ -22,41 +22,54 @@ use strict; use warnings; our $VERSION = "0.1.0"; -package Gopherizer; -use base "HTML::Parser"; +use HTML::TreeBuilder 5 -weak; +use HTML::Strip; +use File::Basename; -our @Links; +my $tree = HTML::TreeBuilder->new(); +my $hs = HTML::Strip->new(); +my @files = @ARGV; +my $output_path="public/gopher"; +my @links; +my $fh; -sub text { - my ($self, $text) = @_; - print $text; +# Prints links to the bottom of the page; +sub print_links { + return; } -sub start { - my ($self, $tag, $attr, $attrseq, $origtext) = @_; - if ($tag eq 'a') { - push(@Links, $attr->{ href }); - } +# Trims excess spaces from HTML::Strip output +sub trim { + my ($string) = @_; + $string =~ tr/ / /s; + return $string; } -sub print_links { - my (@Links) = @_; - +# Create output directory +if (not -e $output_path and not -d $output_path) { + mkdir($output_path) or die "Could not mkdir $output_path: $!"; } -my $parser = new Gopherizer; -my @files = @ARGV; -my $output_path="gopher/"; -mkdir($output_path) or die "Could not mkdir $output_path: $!"; - +# Parse and gopherize each file foreach my $file (@files) { - print "Opening output $file...\n"; - open(our $fh, ">", "$file.gph") + + my ($file_name, $dirs, $suffix) = fileparse($file, qr/\.[^.]*/); + + if (not $suffix =~ /html?/) { + warn "$file does not have .html or .htm extension, refusing to parse.\n"; + next; + } + + print "Creating output file: $output_path/$file_name.gph...\n"; + open($fh, ">", "$output_path/$file_name.gph") or do { - warn "Could not open $file for writing: $!"; + warn "Could not open $output_path/$file_name.gph for writing: $!"; next; }; - print "Parsing HTML $file...\n"; - $parser->parse_file($file); + + print "Parsing HTML from: $file...\n"; + $tree->parse_file($file); + $tree->dump; + close($fh); }