browser.py

Working through the exercises at browser.engineering
git clone https://git.sr.ht/~jbauer/browser.py
Log | Files | Refs | README | LICENSE

commit 9e455cf38f2450f37387068feb8f9f1e83d76711
Author: Jake Bauer <jbauer@paritybit.ca>
Date:   Mon, 13 Feb 2023 16:12:03 -0500

Implement chapter 1 + HTTP/1.1

Diffstat:
ALICENSE | 38++++++++++++++++++++++++++++++++++++++
AREADME.md | 3+++
Abrowser.py | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 113 insertions(+), 0 deletions(-)

diff --git a/LICENSE b/LICENSE @@ -0,0 +1,38 @@ +Code copied from the book: + +Copyright 2018-2023 Pavel Panchekha & Chris Harrelson. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Code added by me to implement exercises: + +ISC License + +Copyright 2023 Jake Bauer <jbauer@paritybit.ca> + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE. diff --git a/README.md b/README.md @@ -0,0 +1,3 @@ +# browser.py + +Working through the exercises at [browser.engineering](//browser.engineering). diff --git a/browser.py b/browser.py @@ -0,0 +1,72 @@ +import socket +import ssl + +def request(url): + scheme,url = url.split("://", 1) + assert scheme in ["http", "https"], \ + "Unknown scheme {}".format(scheme) + host, path = url.split("/", 1) + path = "/" + path + + s = socket.socket( + family=socket.AF_INET, + type=socket.SOCK_STREAM, + proto=socket.IPPROTO_TCP, + ) + + port = 80 if scheme == "http" else 443 + + if scheme == "https": + ctx = ssl.create_default_context() + s = ctx.wrap_socket(s, server_hostname=host) + + if ":" in host: + host, port = host.split(":", 1) + port = int(port) + + s.connect((host, port)) + + s.send("GET {} HTTP/1.1\r\n".format(path).encode("utf8") + + "Host: {}\r\n".format(host).encode("utf8") + + "Connection: close\r\n\r\n".encode("utf8")) + + response = s.makefile("r", encoding="utf8", newline="\r\n") + statusline = response.readline() + version, status, explanation = statusline.split(" ", 2) + assert status == "200", "{}: {}".format(status, explanation) + + headers = {} + while True: + line = response.readline() + if line == "\r\n": break + header, value = line.split(":", 1) + headers[header.lower()] = value.strip() + + assert "transfer-encoding" not in headers + assert "content-encoding" not in headers + + body = response.read() + s.close() + + return headers, body + + +def show(body): + in_angle = False + for c in body: + if c == "<": + in_angle = True + elif c == ">": + in_angle = False + elif not in_angle: + print(c, end="") + + +def load(url): + headers, body = request(url) + show(body) + + +if __name__ == "__main__": + import sys + load(sys.argv[1])