From 32319e1f257d4fd53a931f7d4576883c1e671ffe Mon Sep 17 00:00:00 2001 From: Xavier Fontanet Date: Sun, 5 Feb 2023 12:52:34 +0100 Subject: [PATCH] include DB and browser open CMD --- .gitignore | 1 + departiculares.py | 53 +++++++++++++++++++++++++++++++++--- departiculares.pyc | Bin 0 -> 3606 bytes test_mysql.py | 23 ++++++++++++++++ test_mysql_insert.py | 23 ++++++++++++++++ test_mysql_open_actives.py | 22 +++++++++++++++ test_mysql_open_seelater.py | 22 +++++++++++++++ 7 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 .gitignore create mode 100644 departiculares.pyc create mode 100644 test_mysql.py create mode 100644 test_mysql_insert.py create mode 100644 test_mysql_open_actives.py create mode 100644 test_mysql_open_seelater.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e43b0f9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.DS_Store diff --git a/departiculares.py b/departiculares.py index 9be31f9..46fea87 100644 --- a/departiculares.py +++ b/departiculares.py @@ -1,9 +1,18 @@ +#coding: utf-8 import requests import json from bs4 import BeautifulSoup import re from time import sleep +import webbrowser +import mysql.connector +from datetime import datetime +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) class Anunci: def __init__(self, json_text, preu_capturat): @@ -13,6 +22,15 @@ class Anunci: self.url = json_tree['url'] self.preu = preu_capturat + def te_altura(self): + regex = ".*.tic(o|\s|\.).*" + compilador = re.compile(regex, re.IGNORECASE) + atico = compilador.match(desc) + regex = ".*d.plex.*" + compilador = re.compile(regex, re.IGNORECASE) + duplex = compilador.match(desc) + return atico or duplex + def te_piscina(self): regex = ".*pi(c|s|z)*ina.*" compilador = re.compile(regex, re.IGNORECASE) @@ -31,11 +49,34 @@ class Anunci: te = compilador.match(desc) return te + def es_chalet(self): + regex = ".*(chalet|casa).*" + compilador = re.compile(regex, re.IGNORECASE) + te = compilador.match(titol) + return te + + +cerca = "parquing_altura" headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36" } -base_url = "https://www.departiculares.com/alquiler/barcelona" +base_url = "https://www.departiculares.com/alquiler/barcelona&priceMax=1000" + +def tractar_anunci(anunci): + c0 = cnx.cursor() + c0.execute("SELECT count(*) FROM anuncis WHERE titol=%s", [anunci.titol]) + existance_count = c0.fetchone() + if existance_count[0] > 0: + print("--- Preexistent ---") + else: + c1 = cnx.cursor() + c1.execute("INSERT INTO anuncis (titol, preu, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s,%s)", [anunci_seleccionat.titol, anunci_seleccionat.preu, anunci_seleccionat.url, cerca, datetime.now().strftime('%Y-%m-%d %H:%M:%S'), anunci_seleccionat.descripcio]) + c1.close() + cnx.commit() + print("--- Guardat a BD ---") + c0.close() + pagina = 1 pagina_horitzo = 2 @@ -59,23 +100,25 @@ while pagina < pagina_horitzo: if preu_txt is not None: preu = int(preu_txt.text.strip().replace('.', '').replace('€', '')) - if preu <= 1000: + if preu > 400: res_json = resultat_item.find('script', type='application/ld+json') anunci = Anunci(res_json.string, preu) titol = anunci.titol desc = anunci.descripcio - if anunci.te_piscina() and anunci.te_parquing(): + if anunci.te_parquing(): anuncis_list.append(anunci) for anunci_seleccionat in anuncis_list: print("** Anunci **") print("Títol: " + anunci_seleccionat.titol) print("Descripció: " + anunci_seleccionat.descripcio) - print("Url: " + anunci_seleccionat.url) + print("Url: ") + print(anunci_seleccionat.url) if anunci_seleccionat.preu > 0: print("Preu: " + str(anunci_seleccionat.preu)) print("************") + tractar_anunci(anunci_seleccionat) resultats = soup.find('ul', class_="pager") resultats_item = resultats.find_all('a') @@ -92,5 +135,7 @@ while pagina < pagina_horitzo: else: print("sembla que no hi ha res a tractar") + pagina = pagina + 1 print("Fi de recorregut") +cnx.close() \ No newline at end of file diff --git a/departiculares.pyc b/departiculares.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d9967b900c1f2d304826784f3e05a47f94f7d65 GIT binary patch literal 3606 zcmd5;&2JmW6@R;wC{Z7lELovsJIUHkQ;N1oxpthdwy0t!PLM_|CXb~;op73xX$l*ZG$@>*WQM|78jXBCLwc5uh>Cyu z$t>wP0_Ukm&HEoGa}>_=6kCoasQ5D_n7$CeT1KQBr01y_o+C}vUm$&s6428)!wVNl z*GMl?|2!oNlq?E!iS!cnmni{riIQbuULbve5jbj4@ud&C{|Y7GKww1#nsfkgR%!G% z(oMSe%fC{%MtV&Sf+*ygB6pEt2``ZbQ62X?PhaLKl<2?8!1Q0EWR+muQ(x${F%7G+ zk!`v}$wfNysMz$zSAcVw5^RT6uall4{W|pnmc$|971CFzf0e?kLR_Qp4I#e6*A-uR zy!z&IdX@U$`)D+232caK7?|MaE*|>{28T%T=oA^`k@Fd@Cp9{iW7bZIqH0nj znM}yaL^YYDQ=eD_mJ=*w)we}$>k1P)$MMQmch>#tl<9^YV=vl*=T;ZCJ5mpgF7~07%CzeaPVVW4-Ce!66BjDaO?I%8n}h`uovXr` zxJ*@{Z6`l*(>NuHOlDjZvC@_Y@>aZg`Nr%-MutHUFLXcv{3pQwxi6T2FMN6egps~A zwiRJlxn`$@QcBzQJjPBtH?8>8o<3f0n@n}u;s!Vp8L1ru91jP%Nwom@$^Gr0^gg`1 zwez7MJ5Vk@6x`T5f?|*3QWgt+0Qgw>>I9Vx3pv-FJ!X81(@}103?tYs%P^8x2W|`A z8Dh48Q-N8hXdRcQN9!gVBi38OLT{Oe3p}jw@Ct^PsPvl5TjgPm7x0{E;Bb~O2&y

vWhYyhEWuv0au!e5+8|i3%PXYfu7RkU#tV0a7zSWr74d;>{el{Xk00 zp;lZBrS$D-3H{aVnqau#c%RAjMD?hdgYeO zQlgzQsoluYU9@F>$8r_@$FKijz64SA9_cc2yF1CX<&;LrPED-1_jJ?bI`;ick{)k@lULe=qt2$O}p8W|N?oOW^wcw}nr{5%Lb;My|*5 zqP5AK_=;awa?!ugd8*>-N}LeMdkF_Si)06aM0Hh<{nX<+DqZ4b(WzweT*DxCd?g;? z#YD$=aRG*6X9Xg+R4yaNX5~O5tSk3{bEchz&HsK?B!4Iwo`Nyw&3TLb)Rr&Rycxgl z*U@el@U4Tp49*Ig?+WM@ukO$D2zt}!`I6W08vd%+1b-4-{w{#uz@7!X<}@&0_80sa Ktj^aO{(k`l@D%$1 literal 0 HcmV?d00001 diff --git a/test_mysql.py b/test_mysql.py new file mode 100644 index 0000000..8b66a57 --- /dev/null +++ b/test_mysql.py @@ -0,0 +1,23 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +titol = "Piso en Alquiler en Carrer Cervantes de Òdena, Pisos Òdena" +url = "https://www.fotocasa.es/es/alquiler/vivienda/odena/parking-terraza-trastero/176573781/d?tti=3&ppi=3&xtor=AF-10012-[departiculares]-[general]-[NA]-[NA]-[NA]" +c1.execute("SELECT count(*) FROM anuncis WHERE titol = %s AND url = %s", [titol, url]) +myresult = c1.fetchone() + +if (myresult[0] > 0): + print("yes fuck") + +c1.close() +cnx.close() \ No newline at end of file diff --git a/test_mysql_insert.py b/test_mysql_insert.py new file mode 100644 index 0000000..cec232c --- /dev/null +++ b/test_mysql_insert.py @@ -0,0 +1,23 @@ +import mysql.connector +from datetime import datetime + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)", +['prova', 'prova', 'prova', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova']) +c1.close() + +c2 = cnx.cursor() +c2.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)", +['prova2', 'prova2', 'prova2', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova2']) +c2.close() + +cnx.commit() +cnx.close() \ No newline at end of file diff --git a/test_mysql_open_actives.py b/test_mysql_open_actives.py new file mode 100644 index 0000000..0bec137 --- /dev/null +++ b/test_mysql_open_actives.py @@ -0,0 +1,22 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=0") +myresult = c1.fetchall() + +for x in myresult: + webbrowser.open(x[0]) + +c1.close() + +cnx.close() \ No newline at end of file diff --git a/test_mysql_open_seelater.py b/test_mysql_open_seelater.py new file mode 100644 index 0000000..63e3c8d --- /dev/null +++ b/test_mysql_open_seelater.py @@ -0,0 +1,22 @@ +import mysql.connector +from datetime import datetime +import webbrowser + +print(datetime.now().strftime('%Y-%m-%d %H:%M:%S')) + +cnx = mysql.connector.connect( + user='root', password='1234', + host='192.168.1.22', port=33066, + database='departiculares_spider' +) + +c1 = cnx.cursor() +c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=1") +myresult = c1.fetchall() + +for x in myresult: + webbrowser.open(x[0]) + +c1.close() + +cnx.close() \ No newline at end of file