paritybit.ca

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 18412b032e4de0b35db284082ca53017f287a236
parent 9cf1f5319f0caa38c59f9001627de324316189ce
Author: Jake Bauer <jbauer@paritybit.ca>
Date:   Thu,  4 Jun 2020 01:02:27 -0400

Gopherize progress

Diffstat:
Mgopherize | 26++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/gopherize b/gopherize @@ -22,19 +22,25 @@ use strict; use warnings; our $VERSION = "0.1.0"; -use HTML::TreeBuilder 5 -weak; -use HTML::Strip; +use HTML::TreeBuilder; use File::Basename; my $tree = HTML::TreeBuilder->new(); -my $hs = HTML::Strip->new(); my @files = @ARGV; my $output_path="public/gopher"; -my @links; my $fh; # Prints links to the bottom of the page; sub print_links { + my (@links) = @_; + print "Found ", scalar(@links), " links.\n"; + foreach my $link (@links) { + print "Link: ", $link->[0], ".\n"; + print "Obj: ", $link->[1], ".\n"; + print "Prop ", $link->[2], ".\n"; + print "Tag: ", $link->[3], ".\n"; + print "=========\n" + } return; } @@ -60,6 +66,13 @@ foreach my $file (@files) { next; } + print "Opening input file: $file\n"; + open (my $orig_file, "<:encoding(UTF-8)", $file) + or do { + warn "Could not open input file $file: $!"; + next; + }; + print "Creating output file: $output_path/$file_name.gph...\n"; open($fh, ">", "$output_path/$file_name.gph") or do { @@ -68,8 +81,9 @@ foreach my $file (@files) { }; print "Parsing HTML from: $file...\n"; - $tree->parse_file($file); - $tree->dump; + $tree->parse_file($orig_file)->elementify(); + my $links_ref = $tree->extract_links("a", "img", "audio", "video"); + print_links(@$links_ref); close($fh); }