From e53bb63ff8adfbea152eb4b3d434b0d6f3253f6b Mon Sep 17 00:00:00 2001 From: Xavier Fontanet Date: Wed, 10 Jul 2024 14:12:28 +0200 Subject: [PATCH] some refinements --- departiculares.py | 24 ++++++++++++------- test_mysql_open_actives.py => open_actives.py | 4 +++- 2 files changed, 18 insertions(+), 10 deletions(-) rename test_mysql_open_actives.py => open_actives.py (85%) diff --git a/departiculares.py b/departiculares.py index 46fea87..7c20cc8 100644 --- a/departiculares.py +++ b/departiculares.py @@ -8,6 +8,8 @@ import webbrowser import mysql.connector from datetime import datetime +cerca = "altura" + cnx = mysql.connector.connect( user='root', password='1234', host='192.168.1.22', port=33066, @@ -23,16 +25,16 @@ class Anunci: self.preu = preu_capturat def te_altura(self): - regex = ".*.tic(o|\s|\.).*" + regex = ".*(À|Á|à|á|a)tic.*" compilador = re.compile(regex, re.IGNORECASE) - atico = compilador.match(desc) - regex = ".*d.plex.*" + atico = compilador.match(titol) + regex = ".*(d|D).plex.*" compilador = re.compile(regex, re.IGNORECASE) - duplex = compilador.match(desc) + duplex = compilador.match(titol) return atico or duplex def te_piscina(self): - regex = ".*pi(c|s|z)*ina.*" + regex = ".*pi(c|s|z|sc)ina.*" compilador = re.compile(regex, re.IGNORECASE) te = compilador.match(desc) return te @@ -41,6 +43,10 @@ class Anunci: regex = ".*p.r(qu|k)in.*" compilador = re.compile(regex, re.IGNORECASE) te = compilador.match(desc) + if te == False: + regex = ".*aparca.*" + compilador = re.compile(regex, re.IGNORECASE) + te = compilador.match(desc) return te def te_traster(self): @@ -50,18 +56,17 @@ class Anunci: return te def es_chalet(self): - regex = ".*(chalet|casa).*" + regex = ".*(C|c)(halet|asa) .*" compilador = re.compile(regex, re.IGNORECASE) te = compilador.match(titol) return te -cerca = "parquing_altura" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36" } -base_url = "https://www.departiculares.com/alquiler/barcelona&priceMax=1000" +base_url = "https://www.departiculares.com/alquiler/barcelona/el-barcelones&priceMax=1200&sizeMin=60" def tractar_anunci(anunci): c0 = cnx.cursor() @@ -106,7 +111,8 @@ while pagina < pagina_horitzo: titol = anunci.titol desc = anunci.descripcio - if anunci.te_parquing(): + #if anunci.te_parquing() and anunci.te_piscina(): + if anunci.te_altura(): anuncis_list.append(anunci) for anunci_seleccionat in anuncis_list: diff --git a/test_mysql_open_actives.py b/open_actives.py similarity index 85% rename from test_mysql_open_actives.py rename to open_actives.py index 0bec137..b805979 100644 --- a/test_mysql_open_actives.py +++ b/open_actives.py @@ -15,7 +15,9 @@ c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=0") myresult = c1.fetchall() for x in myresult: - webbrowser.open(x[0]) + url = x[0] + print('opening '+url) + webbrowser.open(url) c1.close()