initial commit
This commit is contained in:
parent
b85aae7271
commit
3d75e205fa
3 changed files with 191 additions and 0 deletions
114
parse_url.py
Normal file
114
parse_url.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
|
||||||
|
# Copyright 2021 Frede Hundewadt
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge,
|
||||||
|
# to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use,
|
||||||
|
# copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software,
|
||||||
|
# and to permit persons to whom the Software is furnished
|
||||||
|
# to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice
|
||||||
|
# shall be included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from operator import itemgetter
|
||||||
|
import argparse
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
strings = list()
|
||||||
|
entries = list()
|
||||||
|
phones = list()
|
||||||
|
contacts = list()
|
||||||
|
# 118.dk
|
||||||
|
# person søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where=
|
||||||
|
# firma søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=business&where=
|
||||||
|
# alle søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=&where=
|
||||||
|
# ingen grund til at fortælle at dette er et script
|
||||||
|
USER_AGENT = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0"}
|
||||||
|
SEARCH_118_URL = "https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where="
|
||||||
|
SITE = "118.dk"
|
||||||
|
|
||||||
|
def parse_url(address):
|
||||||
|
url = f"{SEARCH_118_URL}{urllib.parse.quote_plus(address)}"
|
||||||
|
req = urllib.request.Request(url=url, headers=USER_AGENT)
|
||||||
|
with urllib.request.urlopen(req) as res:
|
||||||
|
page = res.read()
|
||||||
|
soup = BeautifulSoup(page, "html.parser", from_encoding='utf-8')
|
||||||
|
for script in soup(["script", "style", "ul", "input", "form", "title", "ins", "h1", "h2", "h4",
|
||||||
|
"fieldset", "iframe", "strong", "img", "head", "meta", "link"]):
|
||||||
|
script.extract()
|
||||||
|
for string in soup.stripped_strings:
|
||||||
|
if string.startswith("Geo") or \
|
||||||
|
string.startswith("FAG") or \
|
||||||
|
string.startswith("< til") or \
|
||||||
|
string.startswith("Tlf") or \
|
||||||
|
string.startswith("118.dk") or \
|
||||||
|
string.startswith("Vi kan") or \
|
||||||
|
string.startswith("Læs mere") or \
|
||||||
|
string.startswith("Copyright") or \
|
||||||
|
string.startswith("close") or \
|
||||||
|
string.startswith("Fjernelse") or \
|
||||||
|
string.startswith("Her kan") or \
|
||||||
|
string.startswith("Husnummer"):
|
||||||
|
continue
|
||||||
|
if string == "se kort":
|
||||||
|
entries.append(tuple(strings))
|
||||||
|
strings.clear()
|
||||||
|
else:
|
||||||
|
strings.append(string)
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
ad_protect = str(entry[1])
|
||||||
|
if ad_protect.startswith("Reklame"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
same_house = False
|
||||||
|
number = entry[2]
|
||||||
|
name = entry[0]
|
||||||
|
iterate = [x for x in contacts if x["name"] == name]
|
||||||
|
for c in iterate:
|
||||||
|
if number not in c["phones"]:
|
||||||
|
same_house = True
|
||||||
|
c["phones"] = c["phones"] + [number]
|
||||||
|
|
||||||
|
if not same_house:
|
||||||
|
person = {
|
||||||
|
"name": entry[0],
|
||||||
|
"address": entry[1],
|
||||||
|
"phones": [number]
|
||||||
|
}
|
||||||
|
contacts.append(person)
|
||||||
|
except (Exception,):
|
||||||
|
continue
|
||||||
|
|
||||||
|
results = sorted(contacts, key=itemgetter("address"))
|
||||||
|
|
||||||
|
# for result in results:
|
||||||
|
# print(f"Adresse : {result['address']}")
|
||||||
|
# print(f" Navn : {result['name']}")
|
||||||
|
# for number in result["phones"]:
|
||||||
|
# print(f" Tlf : {number}")
|
||||||
|
# print(f"---------------------")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-a", "--adresse", required=True, type=str, help="vejnavn [nr], postnummer")
|
||||||
|
args = parser.parse_args()
|
||||||
|
parse_url(args.adresse)
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
PySide6~=6.1.2
|
||||||
|
beautifulsoup4~=4.9.3
|
||||||
|
certifi
|
74
telefonbog.py
Normal file
74
telefonbog.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
|
||||||
|
# Copyright 2021 Frede Hundewadt
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge,
|
||||||
|
# to any person obtaining a copy of this software and
|
||||||
|
# associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction,
|
||||||
|
# including without limitation the rights to use,
|
||||||
|
# copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software,
|
||||||
|
# and to permit persons to whom the Software is furnished
|
||||||
|
# to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice
|
||||||
|
# shall be included in all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||||
|
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
from PySide6 import QtCore, QtWidgets
|
||||||
|
import sys
|
||||||
|
import parse_url
|
||||||
|
|
||||||
|
|
||||||
|
class Lookup(QtWidgets.QWidget):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.buttonLookup = QtWidgets.QPushButton("Lav opslag 118.dk")
|
||||||
|
self.labelAddressHint = QtWidgets.QLabel("Husnummer kan bruges som afgrænsning på større områder.\nEksempel: Ryhaven, 8210 eller Bispehavevej 121, 8210")
|
||||||
|
self.textAddress = QtWidgets.QLineEdit("vejnavn [nr], postnummer")
|
||||||
|
self.textResult = QtWidgets.QTextEdit()
|
||||||
|
self.textResult.setFont("monospace")
|
||||||
|
|
||||||
|
self.layout = QtWidgets.QVBoxLayout(self)
|
||||||
|
self.layout.addWidget(self.labelAddressHint)
|
||||||
|
self.layout.addWidget(self.textAddress)
|
||||||
|
self.layout.addWidget(self.buttonLookup)
|
||||||
|
self.layout.addWidget(self.textResult)
|
||||||
|
|
||||||
|
self.buttonLookup.clicked.connect(self.lookup)
|
||||||
|
|
||||||
|
@QtCore.Slot()
|
||||||
|
def lookup(self):
|
||||||
|
""" run lookup """
|
||||||
|
if self.textAddress.text() == "vejnavn [nr], postnummer":
|
||||||
|
self.textResult.setText("kan ikke findes")
|
||||||
|
return
|
||||||
|
# run lookup
|
||||||
|
self.textResult.setText("Vent venligst ...")
|
||||||
|
results = parse_url.parse_url(self.textAddress.text())
|
||||||
|
if not results:
|
||||||
|
self.textResult.setText("Ikke fundet")
|
||||||
|
return
|
||||||
|
txt = ""
|
||||||
|
for result in results:
|
||||||
|
txt = f"{txt}Adresse : {result['address']}\n"
|
||||||
|
txt = f"{txt} Navn : {result['name']}\n"
|
||||||
|
for number in result["phones"]:
|
||||||
|
txt = f"{txt} Tlf : {number}\n"
|
||||||
|
txt = f"{txt}---------------------\n"
|
||||||
|
self.textResult.setText(txt)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = QtWidgets.QApplication([])
|
||||||
|
widget = Lookup()
|
||||||
|
widget.resize(400, 600)
|
||||||
|
widget.show()
|
||||||
|
sys.exit(app.exec())
|
Loading…
Reference in a new issue