Skip to content

Commit 11c91fc

Browse files
Actualizada la sintaxis de find_element
1 parent 71c5ff5 commit 11c91fc

1 file changed

Lines changed: 98 additions & 37 deletions

File tree

src/Cap2/scrap.ipynb

Lines changed: 98 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,60 @@
5959
"\n",
6060
"\n",
6161
"**Importante**\n",
62-
"Una vez que logremos ques e abra el navegador no debemos teclear nada en él, el control lo llevaremos desde el programa en Python"
62+
"Una vez que logremos ques e abra el navegador no debemos teclear nada en él, ni cerrarlo el control lo llevaremos desde el programa en Python"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": 6,
68+
"metadata": {},
69+
"outputs": [
70+
{
71+
"name": "stdout",
72+
"output_type": "stream",
73+
"text": [
74+
"Requirement already up-to-date: selenium in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (4.4.3)\n",
75+
"Requirement already satisfied, skipping upgrade: trio~=0.17 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from selenium) (0.21.0)\n",
76+
"Requirement already satisfied, skipping upgrade: certifi>=2021.10.8 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from selenium) (2022.9.14)\n",
77+
"Requirement already satisfied, skipping upgrade: urllib3[socks]~=1.26 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from selenium) (1.26.9)\n",
78+
"Requirement already satisfied, skipping upgrade: trio-websocket~=0.9 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from selenium) (0.9.2)\n",
79+
"Requirement already satisfied, skipping upgrade: idna in d:\\instalado\\anacondainstalado\\lib\\site-packages (from trio~=0.17->selenium) (2.10)\n",
80+
"Requirement already satisfied, skipping upgrade: sniffio in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from trio~=0.17->selenium) (1.3.0)\n",
81+
"Requirement already satisfied, skipping upgrade: cffi>=1.14; os_name == \"nt\" and implementation_name != \"pypy\" in d:\\instalado\\anacondainstalado\\lib\\site-packages (from trio~=0.17->selenium) (1.14.3)\n",
82+
"Requirement already satisfied, skipping upgrade: sortedcontainers in d:\\instalado\\anacondainstalado\\lib\\site-packages (from trio~=0.17->selenium) (2.2.2)\n",
83+
"Requirement already satisfied, skipping upgrade: outcome in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from trio~=0.17->selenium) (1.2.0)\n",
84+
"Requirement already satisfied, skipping upgrade: attrs>=19.2.0 in d:\\instalado\\anacondainstalado\\lib\\site-packages (from trio~=0.17->selenium) (20.3.0)\n",
85+
"Requirement already satisfied, skipping upgrade: async-generator>=1.9 in d:\\instalado\\anacondainstalado\\lib\\site-packages (from trio~=0.17->selenium) (1.10)\n",
86+
"Requirement already satisfied, skipping upgrade: PySocks!=1.5.7,<2.0,>=1.5.6; extra == \"socks\" in d:\\instalado\\anacondainstalado\\lib\\site-packages (from urllib3[socks]~=1.26->selenium) (1.7.1)\n",
87+
"Requirement already satisfied, skipping upgrade: wsproto>=0.14 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from trio-websocket~=0.9->selenium) (1.2.0)\n",
88+
"Requirement already satisfied, skipping upgrade: pycparser in d:\\instalado\\anacondainstalado\\lib\\site-packages (from cffi>=1.14; os_name == \"nt\" and implementation_name != \"pypy\"->trio~=0.17->selenium) (2.20)\n",
89+
"Requirement already satisfied, skipping upgrade: h11<1,>=0.9.0 in c:\\users\\rafa\\appdata\\roaming\\python\\python38\\site-packages (from wsproto>=0.14->trio-websocket~=0.9->selenium) (0.13.0)\n"
90+
]
91+
}
92+
],
93+
"source": [
94+
"# ejecutar esta casilla solo si queremos instalar o actualizar selenium; \n",
95+
"# muchas veces tras hacerlo deberemos hacer \"Kernel->restart\" para que los cambios se apliquen\n",
96+
"import sys\n",
97+
"!{sys.executable} -m pip install --upgrade --user selenium"
6398
]
6499
},
65100
{
66101
"cell_type": "code",
67102
"execution_count": 1,
68103
"metadata": {},
69-
"outputs": [],
104+
"outputs": [
105+
{
106+
"name": "stderr",
107+
"output_type": "stream",
108+
"text": [
109+
"<ipython-input-1-cfdeb6085767>:7: DeprecationWarning: executable_path has been deprecated, please pass in a Service object\n",
110+
" driver = webdriver.Chrome(executable_path=chromedriver,options=chrome_options)\n"
111+
]
112+
}
113+
],
70114
"source": [
71-
"chromedriver = \"c:/hlocal/tdm/chromedriver.exe\" # cambiar esta variable con el path a nuestro chromedriver\n",
115+
"chromedriver = \"./chromedriver.exe\" # cambiar esta variable con el path a nuestro chromedriver\n",
72116
"import os\n",
73117
"from selenium import webdriver # si da error, desde anaconda prompt hacer pip install --user selenium\n",
74118
"os.environ[\"webdriver.chrome.driver\"] = chromedriver\n",
@@ -87,7 +131,7 @@
87131
},
88132
{
89133
"cell_type": "code",
90-
"execution_count": 8,
134+
"execution_count": 2,
91135
"metadata": {},
92136
"outputs": [],
93137
"source": [
@@ -106,11 +150,12 @@
106150
},
107151
{
108152
"cell_type": "code",
109-
"execution_count": 9,
153+
"execution_count": 3,
110154
"metadata": {},
111155
"outputs": [],
112156
"source": [
113-
"coord = driver.find_element_by_link_text(\"COORDENADAS\")\n",
157+
"from selenium.webdriver.common.by import By\n",
158+
"coord = driver.find_element(By.LINK_TEXT,\"COORDENADAS\")\n",
114159
"coord.click()"
115160
]
116161
},
@@ -123,33 +168,31 @@
123168
},
124169
{
125170
"cell_type": "code",
126-
"execution_count": 10,
171+
"execution_count": 4,
127172
"metadata": {},
128173
"outputs": [],
129174
"source": [
130-
"lat = driver.find_element_by_id(\"ctl00_Contenido_txtLatitud\")\n",
131-
"lon = driver.find_element_by_id(\"ctl00_Contenido_txtLongitud\")\n",
175+
"lat = driver.find_element(By.ID,\"ctl00_Contenido_txtLatitud\")\n",
176+
"lon = driver.find_element(By.ID,\"ctl00_Contenido_txtLongitud\")\n",
132177
"latitud = \"41.545639 \"\n",
133178
"longitud = \"1.893817\"\n",
134179
"lat.send_keys(latitud)\n",
135180
"lon.send_keys(longitud)\n",
136181
"\n",
137-
"datos = driver.find_element_by_id(\"ctl00_Contenido_btnDatos\")\n",
182+
"datos = driver.find_element(By.ID,\"ctl00_Contenido_btnDatos\")\n",
138183
"datos.click()"
139184
]
140185
},
141186
{
142-
"cell_type": "code",
143-
"execution_count": null,
187+
"cell_type": "markdown",
144188
"metadata": {},
145-
"outputs": [],
146189
"source": [
147-
"Ahora supongamos que queremos saber el uso principal de este inmueble, para ellos buscamos "
190+
"Ahora supongamos que queremos determinar el uso principal de este inmueble, para ellos buscamos "
148191
]
149192
},
150193
{
151194
"cell_type": "code",
152-
"execution_count": 11,
195+
"execution_count": 6,
153196
"metadata": {},
154197
"outputs": [
155198
{
@@ -163,10 +206,10 @@
163206
],
164207
"source": [
165208
"xpath = \"//*[./span/text()='Referencia catastral']//label\"\n",
166-
"etiqs = driver.find_element_by_xpath(xpath)\n",
209+
"etiqs = driver.find_element(By.XPATH,xpath)\n",
167210
"print(etiqs.text)\n",
168211
"xpath = \"//*[./span/text()='Uso principal']//label\"\n",
169-
"etiqs = driver.find_element_by_xpath(xpath)\n",
212+
"etiqs = driver.find_element(By.XPATH,xpath)\n",
170213
"print(etiqs.text)"
171214
]
172215
},
@@ -190,7 +233,7 @@
190233
"metadata": {},
191234
"outputs": [],
192235
"source": [
193-
"html = driver.find_element_by_xpath(\"/html\")\n",
236+
"html = driver.find_element(By.XPATH,\"/html\")\n",
194237
"print(html.text)"
195238
]
196239
},
@@ -207,9 +250,9 @@
207250
"metadata": {},
208251
"outputs": [],
209252
"source": [
210-
"head = driver.find_element_by_xpath(\"/html/head\")\n",
211-
"body = driver.find_element_by_xpath(\"/html/body\")\n",
212-
"html2 = body.find_element_by_xpath(\"/html\")"
253+
"head = driver.find_element(By.XPATH,\"/html/head\")\n",
254+
"body = driver.find_element(By.XPATH,\"/html/body\")\n",
255+
"html2 = body.find_element(By.XPATH,\"/html\")"
213256
]
214257
},
215258
{
@@ -225,7 +268,7 @@
225268
"metadata": {},
226269
"outputs": [],
227270
"source": [
228-
"hijos = driver.find_elements_by_xpath(\"/html/body/*\")\n",
271+
"hijos = driver.find_elements(By.XPATH,\"/html/body/*\")\n",
229272
"for element in hijos:\n",
230273
" print(element.tag_name)"
231274
]
@@ -243,7 +286,7 @@
243286
"metadata": {},
244287
"outputs": [],
245288
"source": [
246-
"divs = driver.find_elements_by_xpath(\"/html/body/*/div\")\n",
289+
"divs = driver.find_elements(By.XPATH,\"/html/body/*/div\")\n",
247290
"print(len(divs))"
248291
]
249292
},
@@ -253,7 +296,7 @@
253296
"metadata": {},
254297
"outputs": [],
255298
"source": [
256-
"divs = body.find_elements_by_xpath(\"./*/div\")\n",
299+
"divs = body.find_elements(By.XPATH,\"./*/div\")\n",
257300
"print(len(divs))"
258301
]
259302
},
@@ -270,7 +313,7 @@
270313
"metadata": {},
271314
"outputs": [],
272315
"source": [
273-
"divs = driver.find_elements_by_xpath(\"/html/body//div\")\n",
316+
"divs = driver.find_elements(By.XPATH,\"/html/body//div\")\n",
274317
"print(len(divs))"
275318
]
276319
},
@@ -280,7 +323,7 @@
280323
"metadata": {},
281324
"outputs": [],
282325
"source": [
283-
"labels = driver.find_elements_by_xpath(\"//label\")\n",
326+
"labels = driver.find_elements(By.XPATH,\"//label\")\n",
284327
"print(len(labels))"
285328
]
286329
},
@@ -293,13 +336,21 @@
293336
},
294337
{
295338
"cell_type": "code",
296-
"execution_count": null,
339+
"execution_count": 7,
297340
"metadata": {},
298-
"outputs": [],
341+
"outputs": [
342+
{
343+
"name": "stdout",
344+
"output_type": "stream",
345+
"text": [
346+
"7801701DF0070S0001QY \n"
347+
]
348+
}
349+
],
299350
"source": [
300351
"id = \"ctl00_Contenido_tblInmueble\"\n",
301-
"div = driver.find_element_by_id(id)\n",
302-
"label = div.find_element_by_xpath(\"//label\")\n",
352+
"div = driver.find_element(By.ID,id)\n",
353+
"label = div.find_element(By.XPATH,\"//label\")\n",
303354
"print(label.text)"
304355
]
305356
},
@@ -310,29 +361,39 @@
310361
"outputs": [],
311362
"source": [
312363
"xpath = \"//*[./span/text()='Referencia catastral']//label\"\n",
313-
"etiqs = driver.find_element_by_xpath(xpath)\n",
364+
"etiqs = driver.find_element(By.XPATH,xpath)\n",
314365
"print(etiqs.text)"
315366
]
316367
},
317368
{
318369
"cell_type": "code",
319-
"execution_count": null,
370+
"execution_count": 8,
320371
"metadata": {},
321-
"outputs": [],
372+
"outputs": [
373+
{
374+
"name": "stdout",
375+
"output_type": "stream",
376+
"text": [
377+
"\n",
378+
"\n",
379+
"\n"
380+
]
381+
}
382+
],
322383
"source": [
323-
"clase = driver.find_elements_by_xpath(\"(//label)[position()=3]\")\n",
384+
"clase = driver.find_elements(By.XPATH,\"(//label)[position()=3]\")\n",
324385
"print(clase[0].text)\n",
325386
"\n",
326-
"etiqs = driver.find_elements_by_xpath(\"//label\")\n",
387+
"etiqs = driver.find_elements(By.XPATH,\"//label\")\n",
327388
"print(etiqs[2].text)\n",
328389
"\n",
329-
"ulti = driver.find_elements_by_xpath(\"(//label)[last()]\")\n",
390+
"ulti = driver.find_elements(By.XPATH,\"(//label)[last()]\")\n",
330391
"print(ulti[0].text)\n"
331392
]
332393
},
333394
{
334395
"cell_type": "code",
335-
"execution_count": null,
396+
"execution_count": 9,
336397
"metadata": {},
337398
"outputs": [],
338399
"source": [

0 commit comments

Comments
 (0)