1
0
Fork 0

include DB and browser open CMD

This commit is contained in:
Xavier Fontanet 2023-02-05 12:52:34 +01:00
parent 790aff1adc
commit 32319e1f25
7 changed files with 140 additions and 4 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.DS_Store

View File

@ -1,9 +1,18 @@
#coding: utf-8
import requests import requests
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re import re
from time import sleep from time import sleep
import webbrowser
import mysql.connector
from datetime import datetime
cnx = mysql.connector.connect(
user='root', password='1234',
host='192.168.1.22', port=33066,
database='departiculares_spider'
)
class Anunci: class Anunci:
def __init__(self, json_text, preu_capturat): def __init__(self, json_text, preu_capturat):
@ -13,6 +22,15 @@ class Anunci:
self.url = json_tree['url'] self.url = json_tree['url']
self.preu = preu_capturat self.preu = preu_capturat
def te_altura(self):
regex = ".*.tic(o|\s|\.).*"
compilador = re.compile(regex, re.IGNORECASE)
atico = compilador.match(desc)
regex = ".*d.plex.*"
compilador = re.compile(regex, re.IGNORECASE)
duplex = compilador.match(desc)
return atico or duplex
def te_piscina(self): def te_piscina(self):
regex = ".*pi(c|s|z)*ina.*" regex = ".*pi(c|s|z)*ina.*"
compilador = re.compile(regex, re.IGNORECASE) compilador = re.compile(regex, re.IGNORECASE)
@ -31,11 +49,34 @@ class Anunci:
te = compilador.match(desc) te = compilador.match(desc)
return te return te
def es_chalet(self):
regex = ".*(chalet|casa).*"
compilador = re.compile(regex, re.IGNORECASE)
te = compilador.match(titol)
return te
cerca = "parquing_altura"
headers = { headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36" "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"
} }
base_url = "https://www.departiculares.com/alquiler/barcelona" base_url = "https://www.departiculares.com/alquiler/barcelona&priceMax=1000"
def tractar_anunci(anunci):
c0 = cnx.cursor()
c0.execute("SELECT count(*) FROM anuncis WHERE titol=%s", [anunci.titol])
existance_count = c0.fetchone()
if existance_count[0] > 0:
print("--- Preexistent ---")
else:
c1 = cnx.cursor()
c1.execute("INSERT INTO anuncis (titol, preu, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s,%s)", [anunci_seleccionat.titol, anunci_seleccionat.preu, anunci_seleccionat.url, cerca, datetime.now().strftime('%Y-%m-%d %H:%M:%S'), anunci_seleccionat.descripcio])
c1.close()
cnx.commit()
print("--- Guardat a BD ---")
c0.close()
pagina = 1 pagina = 1
pagina_horitzo = 2 pagina_horitzo = 2
@ -59,23 +100,25 @@ while pagina < pagina_horitzo:
if preu_txt is not None: if preu_txt is not None:
preu = int(preu_txt.text.strip().replace('.', '').replace('', '')) preu = int(preu_txt.text.strip().replace('.', '').replace('', ''))
if preu <= 1000: if preu > 400:
res_json = resultat_item.find('script', type='application/ld+json') res_json = resultat_item.find('script', type='application/ld+json')
anunci = Anunci(res_json.string, preu) anunci = Anunci(res_json.string, preu)
titol = anunci.titol titol = anunci.titol
desc = anunci.descripcio desc = anunci.descripcio
if anunci.te_piscina() and anunci.te_parquing(): if anunci.te_parquing():
anuncis_list.append(anunci) anuncis_list.append(anunci)
for anunci_seleccionat in anuncis_list: for anunci_seleccionat in anuncis_list:
print("** Anunci **") print("** Anunci **")
print("Títol: " + anunci_seleccionat.titol) print("Títol: " + anunci_seleccionat.titol)
print("Descripció: " + anunci_seleccionat.descripcio) print("Descripció: " + anunci_seleccionat.descripcio)
print("Url: " + anunci_seleccionat.url) print("Url: ")
print(anunci_seleccionat.url)
if anunci_seleccionat.preu > 0: if anunci_seleccionat.preu > 0:
print("Preu: " + str(anunci_seleccionat.preu)) print("Preu: " + str(anunci_seleccionat.preu))
print("************") print("************")
tractar_anunci(anunci_seleccionat)
resultats = soup.find('ul', class_="pager") resultats = soup.find('ul', class_="pager")
resultats_item = resultats.find_all('a') resultats_item = resultats.find_all('a')
@ -92,5 +135,7 @@ while pagina < pagina_horitzo:
else: else:
print("sembla que no hi ha res a tractar") print("sembla que no hi ha res a tractar")
pagina = pagina + 1
print("Fi de recorregut") print("Fi de recorregut")
cnx.close()

BIN
departiculares.pyc Normal file

Binary file not shown.

23
test_mysql.py Normal file
View File

@ -0,0 +1,23 @@
import mysql.connector
from datetime import datetime
import webbrowser
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
cnx = mysql.connector.connect(
user='root', password='1234',
host='192.168.1.22', port=33066,
database='departiculares_spider'
)
c1 = cnx.cursor()
titol = "Piso en Alquiler en Carrer Cervantes de Òdena, Pisos Òdena"
url = "https://www.fotocasa.es/es/alquiler/vivienda/odena/parking-terraza-trastero/176573781/d?tti=3&ppi=3&xtor=AF-10012-[departiculares]-[general]-[NA]-[NA]-[NA]"
c1.execute("SELECT count(*) FROM anuncis WHERE titol = %s AND url = %s", [titol, url])
myresult = c1.fetchone()
if (myresult[0] > 0):
print("yes fuck")
c1.close()
cnx.close()

23
test_mysql_insert.py Normal file
View File

@ -0,0 +1,23 @@
import mysql.connector
from datetime import datetime
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
cnx = mysql.connector.connect(
user='root', password='1234',
host='192.168.1.22', port=33066,
database='departiculares_spider'
)
c1 = cnx.cursor()
c1.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)",
['prova', 'prova', 'prova', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova'])
c1.close()
c2 = cnx.cursor()
c2.execute("INSERT INTO anuncis (titol, url, cerca, data_update, descripcio) VALUES (%s,%s,%s,%s,%s)",
['prova2', 'prova2', 'prova2', datetime.now().strftime('%Y-%m-%d %H:%M:%S'), 'prova2'])
c2.close()
cnx.commit()
cnx.close()

View File

@ -0,0 +1,22 @@
import mysql.connector
from datetime import datetime
import webbrowser
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
cnx = mysql.connector.connect(
user='root', password='1234',
host='192.168.1.22', port=33066,
database='departiculares_spider'
)
c1 = cnx.cursor()
c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=0")
myresult = c1.fetchall()
for x in myresult:
webbrowser.open(x[0])
c1.close()
cnx.close()

View File

@ -0,0 +1,22 @@
import mysql.connector
from datetime import datetime
import webbrowser
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
cnx = mysql.connector.connect(
user='root', password='1234',
host='192.168.1.22', port=33066,
database='departiculares_spider'
)
c1 = cnx.cursor()
c1.execute("SELECT url FROM anuncis WHERE actiu=1 AND veure_mes_tard=1")
myresult = c1.fetchall()
for x in myresult:
webbrowser.open(x[0])
c1.close()
cnx.close()