commit 18412b032e4de0b35db284082ca53017f287a236
parent 9cf1f5319f0caa38c59f9001627de324316189ce
Author: Jake Bauer <jbauer@paritybit.ca>
Date: Thu, 4 Jun 2020 01:02:27 -0400
Gopherize progress
Diffstat:
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/gopherize b/gopherize
@@ -22,19 +22,25 @@ use strict;
use warnings;
our $VERSION = "0.1.0";
-use HTML::TreeBuilder 5 -weak;
-use HTML::Strip;
+use HTML::TreeBuilder;
use File::Basename;
my $tree = HTML::TreeBuilder->new();
-my $hs = HTML::Strip->new();
my @files = @ARGV;
my $output_path="public/gopher";
-my @links;
my $fh;
# Prints links to the bottom of the page;
sub print_links {
+ my (@links) = @_;
+ print "Found ", scalar(@links), " links.\n";
+ foreach my $link (@links) {
+ print "Link: ", $link->[0], ".\n";
+ print "Obj: ", $link->[1], ".\n";
+ print "Prop ", $link->[2], ".\n";
+ print "Tag: ", $link->[3], ".\n";
+ print "=========\n"
+ }
return;
}
@@ -60,6 +66,13 @@ foreach my $file (@files) {
next;
}
+ print "Opening input file: $file\n";
+ open (my $orig_file, "<:encoding(UTF-8)", $file)
+ or do {
+ warn "Could not open input file $file: $!";
+ next;
+ };
+
print "Creating output file: $output_path/$file_name.gph...\n";
open($fh, ">", "$output_path/$file_name.gph")
or do {
@@ -68,8 +81,9 @@ foreach my $file (@files) {
};
print "Parsing HTML from: $file...\n";
- $tree->parse_file($file);
- $tree->dump;
+ $tree->parse_file($orig_file)->elementify();
+ my $links_ref = $tree->extract_links("a", "img", "audio", "video");
+ print_links(@$links_ref);
close($fh);
}