diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/departiculares.py b/departiculares.py index 9be31f9..46fea87 100644 --- a/departiculares.py +++ b/departiculares.py @@ -1,9 +1,18 @@ +#coding: utf-8 import requests import json from bs4 import BeautifulSoup import re from time import sleep +import webbrowser +import mysql.connector +from datetime import datetime +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) class Anunci: def __init__(self, json_text, preu_capturat): @@ -13,6 +22,15 @@ class Anunci: self.url = json_tree['url'] self.preu = preu_capturat + def te_altura(self): + regex = ".*.tic(o|\s|\.).*" + compilador = re.compile(regex, re.IGNORECASE) + atico = compilador.match(desc) + regex = ".*d.plex.*" + compilador = re.compile(regex, re.IGNORECASE) + duplex = compilador.match(desc) + return atico or duplex + def te_piscina(self): regex = ".*pi(c|s|z)*ina.*" compilador = re.compile(regex, re.IGNORECASE) @@ -31,11 +49,34 @@ class Anunci: te = compilador.match(desc) return te + def es_chalet(self): + regex = ".*(chalet|casa).*" + compilador = re.compile(regex, re.IGNORECASE) + te = compilador.match(titol) + return te + + +cerca = "parquing_altura" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36" } -base_url = "https://www.departiculares.com/alquiler/barcelona" +base_url = "https://www.departiculares.com/alquiler/barcelona&priceMax=1000" + +def tractar_anunci(anunci): + c0 = cnx.cursor() + c0.execute("SELECT count(*) FROM anuncis WHERE titol=%s", [anunci.titol]) + existance_count = c0.fetchone() + if existance_count[0] > 0: + print("--- Preexistent ---") + else: + c1 = cnx.cursor() + c1.execute("INSERT INTO anuncis (titol, preu, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s,%s)", [anunci_seleccionat.titol, anunci_seleccionat.preu, anunci_seleccionat.url, cerca, datetime.now().strftime('%Y-%m-%d %H:%M:%S'), anunci_seleccionat.descripcio]) + c1.close() + cnx.commit() + print("--- Guardat a BD ---") + c0.close() + pagina = 1 pagina_horitzo = 2 @@ -59,23 +100,25 @@ while pagina < pagina_horitzo: if preu_txt is not None: preu = int(preu_txt.text.strip().replace('.', '').replace('€', '')) - if preu <= 1000: + if preu > 400: res_json = resultat_item.find('script', type='application/ld+json') anunci = Anunci(res_json.string, preu) titol = anunci.titol desc = anunci.descripcio - if anunci.te_piscina() and anunci.te_parquing(): + if anunci.te_parquing(): anuncis_list.append(anunci) for anunci_seleccionat in anuncis_list: print("** Anunci **") print("Títol: " + anunci_seleccionat.titol) print("Descripció: " + anunci_seleccionat.descripcio) - print("Url: " + anunci_seleccionat.url) + print("Url: ") + print(anunci_seleccionat.url) if anunci_seleccionat.preu > 0: print("Preu: " + str(anunci_seleccionat.preu)) print("************") + tractar_anunci(anunci_seleccionat) resultats = soup.find('ul', class_="pager") resultats_item = resultats.find_all('a') @@ -92,5 +135,7 @@ while pagina < pagina_horitzo: else: print("sembla que no hi ha res a tractar") + pagina = pagina + 1 print("Fi de recorregut") +cnx.close() \ No newline at end of file diff --git a/departiculares.pyc b/departiculares.pyc new file mode 100644 index 0000000..6d9967b Binary files /dev/null and b/departiculares.pyc differ diff --git a/test_mysql.py b/test_mysql.py new file mode 100644 index 0000000..8b66a57 --- /dev/null +++ b/test_mysql.py @@ -0,0 +1,23 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +titol = "Piso en Alquiler en Carrer Cervantes de Òdena, Pisos Òdena" +url = "https://www.fotocasa.es/es/alquiler/vivienda/odena/parking-terraza-trastero/176573781/d?tti=3&ppi=3&xtor=AF-10012-[departiculares]-[general]-[NA]-[NA]-[NA]" +c1.execute("SELECT count(*) FROM anuncis WHERE titol = %s AND url = %s", [titol, url]) +myresult = c1.fetchone() + +if (myresult[0] > 0): + print("yes fuck") + +c1.close() +cnx.close() \ No newline at end of file diff --git a/test_mysql_insert.py b/test_mysql_insert.py new file mode 100644 index 0000000..cec232c --- /dev/null +++ b/test_mysql_insert.py @@ -0,0 +1,23 @@ +import mysql.connector +from datetime import datetime + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)", +['prova', 'prova', 'prova', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova']) +c1.close() + +c2 = cnx.cursor() +c2.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)", +['prova2', 'prova2', 'prova2', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova2']) +c2.close() + +cnx.commit() +cnx.close() \ No newline at end of file diff --git a/test_mysql_open_actives.py b/test_mysql_open_actives.py new file mode 100644 index 0000000..0bec137 --- /dev/null +++ b/test_mysql_open_actives.py @@ -0,0 +1,22 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=0") +myresult = c1.fetchall() + +for x in myresult: + webbrowser.open(x[0]) + +c1.close() + +cnx.close() \ No newline at end of file diff --git a/test_mysql_open_seelater.py b/test_mysql_open_seelater.py new file mode 100644 index 0000000..63e3c8d --- /dev/null +++ b/test_mysql_open_seelater.py @@ -0,0 +1,22 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=1") +myresult = c1.fetchall() + +for x in myresult: + webbrowser.open(x[0]) + +c1.close() + +cnx.close() \ No newline at end of file