From 3d75e205fadab0f908b01534a6a1cc5e46f6874a Mon Sep 17 00:00:00 2001 From: Frede Hundewadt <22748698+fhdk@users.noreply.github.com> Date: Sat, 17 Jul 2021 13:03:08 +0200 Subject: [PATCH] initial commit --- parse_url.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 ++ telefonbog.py | 74 ++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 parse_url.py create mode 100644 requirements.txt create mode 100644 telefonbog.py diff --git a/parse_url.py b/parse_url.py new file mode 100644 index 0000000..4d277e0 --- /dev/null +++ b/parse_url.py @@ -0,0 +1,114 @@ +#!/usr/bin/python3 +# -*- coding: utf8 -*- + +# Copyright 2021 Frede Hundewadt +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and +# associated documentation files (the "Software"), +# to deal in the Software without restriction, +# including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished +# to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from bs4 import BeautifulSoup +from operator import itemgetter +import argparse +import urllib.request +import urllib.parse +import urllib + +strings = list() +entries = list() +phones = list() +contacts = list() +# 118.dk +# person søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where= +# firma søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=business&where= +# alle søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=&where= +# ingen grund til at fortælle at dette er et script +USER_AGENT = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0"} +SEARCH_118_URL = "https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where=" +SITE = "118.dk" + +def parse_url(address): + url = f"{SEARCH_118_URL}{urllib.parse.quote_plus(address)}" + req = urllib.request.Request(url=url, headers=USER_AGENT) + with urllib.request.urlopen(req) as res: + page = res.read() + soup = BeautifulSoup(page, "html.parser", from_encoding='utf-8') + for script in soup(["script", "style", "ul", "input", "form", "title", "ins", "h1", "h2", "h4", + "fieldset", "iframe", "strong", "img", "head", "meta", "link"]): + script.extract() + for string in soup.stripped_strings: + if string.startswith("Geo") or \ + string.startswith("FAG") or \ + string.startswith("< til") or \ + string.startswith("Tlf") or \ + string.startswith("118.dk") or \ + string.startswith("Vi kan") or \ + string.startswith("Læs mere") or \ + string.startswith("Copyright") or \ + string.startswith("close") or \ + string.startswith("Fjernelse") or \ + string.startswith("Her kan") or \ + string.startswith("Husnummer"): + continue + if string == "se kort": + entries.append(tuple(strings)) + strings.clear() + else: + strings.append(string) + + for entry in entries: + ad_protect = str(entry[1]) + if ad_protect.startswith("Reklame"): + continue + try: + same_house = False + number = entry[2] + name = entry[0] + iterate = [x for x in contacts if x["name"] == name] + for c in iterate: + if number not in c["phones"]: + same_house = True + c["phones"] = c["phones"] + [number] + + if not same_house: + person = { + "name": entry[0], + "address": entry[1], + "phones": [number] + } + contacts.append(person) + except (Exception,): + continue + + results = sorted(contacts, key=itemgetter("address")) + + # for result in results: + # print(f"Adresse : {result['address']}") + # print(f" Navn : {result['name']}") + # for number in result["phones"]: + # print(f" Tlf : {number}") + # print(f"---------------------") + + return results + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--adresse", required=True, type=str, help="vejnavn [nr], postnummer") + args = parser.parse_args() + parse_url(args.adresse) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b07897c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +PySide6~=6.1.2 +beautifulsoup4~=4.9.3 +certifi diff --git a/telefonbog.py b/telefonbog.py new file mode 100644 index 0000000..6bb4ba9 --- /dev/null +++ b/telefonbog.py @@ -0,0 +1,74 @@ +#!/usr/bin/python3 +# -*- coding: utf8 -*- + +# Copyright 2021 Frede Hundewadt +# +# Permission is hereby granted, free of charge, +# to any person obtaining a copy of this software and +# associated documentation files (the "Software"), +# to deal in the Software without restriction, +# including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished +# to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice +# shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from PySide6 import QtCore, QtWidgets +import sys +import parse_url + + +class Lookup(QtWidgets.QWidget): + def __init__(self): + super().__init__() + self.buttonLookup = QtWidgets.QPushButton("Lav opslag 118.dk") + self.labelAddressHint = QtWidgets.QLabel("Husnummer kan bruges som afgrænsning på større områder.\nEksempel: Ryhaven, 8210 eller Bispehavevej 121, 8210") + self.textAddress = QtWidgets.QLineEdit("vejnavn [nr], postnummer") + self.textResult = QtWidgets.QTextEdit() + self.textResult.setFont("monospace") + + self.layout = QtWidgets.QVBoxLayout(self) + self.layout.addWidget(self.labelAddressHint) + self.layout.addWidget(self.textAddress) + self.layout.addWidget(self.buttonLookup) + self.layout.addWidget(self.textResult) + + self.buttonLookup.clicked.connect(self.lookup) + + @QtCore.Slot() + def lookup(self): + """ run lookup """ + if self.textAddress.text() == "vejnavn [nr], postnummer": + self.textResult.setText("kan ikke findes") + return + # run lookup + self.textResult.setText("Vent venligst ...") + results = parse_url.parse_url(self.textAddress.text()) + if not results: + self.textResult.setText("Ikke fundet") + return + txt = "" + for result in results: + txt = f"{txt}Adresse : {result['address']}\n" + txt = f"{txt} Navn : {result['name']}\n" + for number in result["phones"]: + txt = f"{txt} Tlf : {number}\n" + txt = f"{txt}---------------------\n" + self.textResult.setText(txt) + +if __name__ == "__main__": + app = QtWidgets.QApplication([]) + widget = Lookup() + widget.resize(400, 600) + widget.show() + sys.exit(app.exec())