initial commit
This commit is contained in:
parent
b85aae7271
commit
3d75e205fa
3 changed files with 191 additions and 0 deletions
114
parse_url.py
Normal file
114
parse_url.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
#!/usr/bin/python3
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
# Copyright 2021 Frede Hundewadt
|
||||
#
|
||||
# Permission is hereby granted, free of charge,
|
||||
# to any person obtaining a copy of this software and
|
||||
# associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction,
|
||||
# including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software,
|
||||
# and to permit persons to whom the Software is furnished
|
||||
# to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice
|
||||
# shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from operator import itemgetter
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import urllib
|
||||
|
||||
strings = list()
|
||||
entries = list()
|
||||
phones = list()
|
||||
contacts = list()
|
||||
# 118.dk
|
||||
# person søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where=
|
||||
# firma søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=business&where=
|
||||
# alle søgning : https://www.118.dk/search/go?pageSize=100&page=1&listingType=&where=
|
||||
# ingen grund til at fortælle at dette er et script
|
||||
USER_AGENT = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0"}
|
||||
SEARCH_118_URL = "https://www.118.dk/search/go?pageSize=100&page=1&listingType=residential&where="
|
||||
SITE = "118.dk"
|
||||
|
||||
def parse_url(address):
|
||||
url = f"{SEARCH_118_URL}{urllib.parse.quote_plus(address)}"
|
||||
req = urllib.request.Request(url=url, headers=USER_AGENT)
|
||||
with urllib.request.urlopen(req) as res:
|
||||
page = res.read()
|
||||
soup = BeautifulSoup(page, "html.parser", from_encoding='utf-8')
|
||||
for script in soup(["script", "style", "ul", "input", "form", "title", "ins", "h1", "h2", "h4",
|
||||
"fieldset", "iframe", "strong", "img", "head", "meta", "link"]):
|
||||
script.extract()
|
||||
for string in soup.stripped_strings:
|
||||
if string.startswith("Geo") or \
|
||||
string.startswith("FAG") or \
|
||||
string.startswith("< til") or \
|
||||
string.startswith("Tlf") or \
|
||||
string.startswith("118.dk") or \
|
||||
string.startswith("Vi kan") or \
|
||||
string.startswith("Læs mere") or \
|
||||
string.startswith("Copyright") or \
|
||||
string.startswith("close") or \
|
||||
string.startswith("Fjernelse") or \
|
||||
string.startswith("Her kan") or \
|
||||
string.startswith("Husnummer"):
|
||||
continue
|
||||
if string == "se kort":
|
||||
entries.append(tuple(strings))
|
||||
strings.clear()
|
||||
else:
|
||||
strings.append(string)
|
||||
|
||||
for entry in entries:
|
||||
ad_protect = str(entry[1])
|
||||
if ad_protect.startswith("Reklame"):
|
||||
continue
|
||||
try:
|
||||
same_house = False
|
||||
number = entry[2]
|
||||
name = entry[0]
|
||||
iterate = [x for x in contacts if x["name"] == name]
|
||||
for c in iterate:
|
||||
if number not in c["phones"]:
|
||||
same_house = True
|
||||
c["phones"] = c["phones"] + [number]
|
||||
|
||||
if not same_house:
|
||||
person = {
|
||||
"name": entry[0],
|
||||
"address": entry[1],
|
||||
"phones": [number]
|
||||
}
|
||||
contacts.append(person)
|
||||
except (Exception,):
|
||||
continue
|
||||
|
||||
results = sorted(contacts, key=itemgetter("address"))
|
||||
|
||||
# for result in results:
|
||||
# print(f"Adresse : {result['address']}")
|
||||
# print(f" Navn : {result['name']}")
|
||||
# for number in result["phones"]:
|
||||
# print(f" Tlf : {number}")
|
||||
# print(f"---------------------")
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-a", "--adresse", required=True, type=str, help="vejnavn [nr], postnummer")
|
||||
args = parser.parse_args()
|
||||
parse_url(args.adresse)
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
PySide6~=6.1.2
|
||||
beautifulsoup4~=4.9.3
|
||||
certifi
|
74
telefonbog.py
Normal file
74
telefonbog.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/python3
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
# Copyright 2021 Frede Hundewadt
|
||||
#
|
||||
# Permission is hereby granted, free of charge,
|
||||
# to any person obtaining a copy of this software and
|
||||
# associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction,
|
||||
# including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software,
|
||||
# and to permit persons to whom the Software is furnished
|
||||
# to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice
|
||||
# shall be included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
from PySide6 import QtCore, QtWidgets
|
||||
import sys
|
||||
import parse_url
|
||||
|
||||
|
||||
class Lookup(QtWidgets.QWidget):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.buttonLookup = QtWidgets.QPushButton("Lav opslag 118.dk")
|
||||
self.labelAddressHint = QtWidgets.QLabel("Husnummer kan bruges som afgrænsning på større områder.\nEksempel: Ryhaven, 8210 eller Bispehavevej 121, 8210")
|
||||
self.textAddress = QtWidgets.QLineEdit("vejnavn [nr], postnummer")
|
||||
self.textResult = QtWidgets.QTextEdit()
|
||||
self.textResult.setFont("monospace")
|
||||
|
||||
self.layout = QtWidgets.QVBoxLayout(self)
|
||||
self.layout.addWidget(self.labelAddressHint)
|
||||
self.layout.addWidget(self.textAddress)
|
||||
self.layout.addWidget(self.buttonLookup)
|
||||
self.layout.addWidget(self.textResult)
|
||||
|
||||
self.buttonLookup.clicked.connect(self.lookup)
|
||||
|
||||
@QtCore.Slot()
|
||||
def lookup(self):
|
||||
""" run lookup """
|
||||
if self.textAddress.text() == "vejnavn [nr], postnummer":
|
||||
self.textResult.setText("kan ikke findes")
|
||||
return
|
||||
# run lookup
|
||||
self.textResult.setText("Vent venligst ...")
|
||||
results = parse_url.parse_url(self.textAddress.text())
|
||||
if not results:
|
||||
self.textResult.setText("Ikke fundet")
|
||||
return
|
||||
txt = ""
|
||||
for result in results:
|
||||
txt = f"{txt}Adresse : {result['address']}\n"
|
||||
txt = f"{txt} Navn : {result['name']}\n"
|
||||
for number in result["phones"]:
|
||||
txt = f"{txt} Tlf : {number}\n"
|
||||
txt = f"{txt}---------------------\n"
|
||||
self.textResult.setText(txt)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QtWidgets.QApplication([])
|
||||
widget = Lookup()
|
||||
widget.resize(400, 600)
|
||||
widget.show()
|
||||
sys.exit(app.exec())
|
Loading…
Reference in a new issue