Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 73 additions & 73 deletions Labs/Python_Notebooks/LAB2/Imbd_lab_students.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 1,
"id": "dc2d3ffc",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"id": "b2aaba9b",
"metadata": {},
"outputs": [
Expand All @@ -62,14 +62,14 @@
"source": [
"# Configuramos las opciones de Chrome\n",
"chrome_options = Options()\n",
"# chrome_options.add_argument(\"--headless\")\n",
"#chrome_options.add_argument(\"--headless\")\n",
"chrome_options.add_argument(\"--start-maximized\")\n",
"chrome_options.add_argument(\"--lang=en-US\")\n",
"\n",
"# Iniciar el WebDriver de Chrome\n",
"# Pista: Crea una variable llamada 'driver' y asígnale la instancia de webdriver.Chrome(),\n",
"# pasando nuestras 'chrome_options' como argumento.\n",
"driver = # [...COMPLETA AQUÍ...]\n",
"driver = webdriver.Chrome(options=chrome_options)\n",
"\n",
"print(\"WebDriver iniciado con éxito.\")"
]
Expand All @@ -86,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "a92ee23e",
"metadata": {},
"outputs": [
Expand All @@ -95,7 +95,7 @@
"output_type": "stream",
"text": [
"Esperando a que la lista de películas cargue...\n",
"Lista de películas encontrada. Comenzando el scraping.\n"
"Lista de películas encontrada. ¡A scrapear!\n"
]
}
],
Expand All @@ -104,7 +104,7 @@
"url = \"https://www.imdb.com/chart/top/\"\n",
"\n",
"# Pista: El objeto 'driver' tiene un método para abrir una URL. ¿Cuál es?\n",
"# [...COMPLETA AQUÍ...]\n",
"driver.get(url)\n",
"\n",
"# Lista para guardar los datos de cada película\n",
"movies_data = []\n",
Expand All @@ -117,8 +117,8 @@
" \n",
" # Completa la espera para que el script se detenga hasta que la lista de películas sea visible.\n",
" # Pista: Usa EC.visibility_of_element_located() y pásale una tupla con el método de búsqueda (By) y el selector.\n",
" WebDriverWait(driver, 10).until(\n",
" EC.visibility_of_element_located(( # [...COMPLETA AQUÍ EL MÉTODO By Y EL SELECTOR...] ))\n",
" WebDriverWait(driver, 10).until( #Te permite demorarte\n",
" EC.visibility_of_element_located((By.CSS_SELECTOR, movie_list_selector))\n",
" )\n",
" print(\"Lista de películas encontrada. ¡A scrapear!\")\n",
"\n",
Expand All @@ -144,64 +144,64 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"id": "4d0d1eca",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Scraped: #1 The Shawshank Redemption\n",
" Scraped: #2 The Godfather\n",
" Scraped: #3 The Dark Knight\n",
" Scraped: #4 The Godfather Part II\n",
" Scraped: #5 12 Angry Men\n",
" Scraped: #6 The Lord of the Rings: The Return of the King\n",
" Scraped: #7 Schindler's List\n",
" Scraped: #8 Pulp Fiction\n",
" Scraped: #9 The Lord of the Rings: The Fellowship of the Ring\n",
" Scraped: #10 The Good, the Bad and the Ugly\n",
" Scraped: #11 Forrest Gump\n",
" Scraped: #12 The Lord of the Rings: The Two Towers\n",
" Scraped: #13 Fight Club\n",
" Scraped: #14 Inception\n",
" Scraped: #15 Star Wars: Episode V - The Empire Strikes Back\n",
" Scraped: #16 The Matrix\n",
" Scraped: #17 Goodfellas\n",
" Scraped: #18 Interstellar\n",
" Scraped: #19 One Flew Over the Cuckoo's Nest\n",
" Scraped: #20 Se7en\n",
" Scraped: #21 It's a Wonderful Life\n",
" Scraped: #22 The Silence of the Lambs\n",
" Scraped: #23 Seven Samurai\n",
" Scraped: #24 Saving Private Ryan\n",
" Scraped: #25 The Green Mile\n",
" Scraped: #26 City of God\n",
" Scraped: #27 Life Is Beautiful\n",
" Scraped: #28 Terminator 2: Judgment Day\n",
" Scraped: #29 Star Wars: Episode IV - A New Hope\n",
" Scraped: #30 Back to the Future\n",
" Scraped: #31 Spirited Away\n",
" Scraped: #32 The Pianist\n",
" Scraped: #33 Gladiator\n",
" Scraped: #34 Parasite\n",
" Scraped: #35 Psycho\n",
" Scraped: #36 The Lion King\n",
" Scraped: #37 Grave of the Fireflies\n",
" Scraped: #38 The Departed\n",
" Scraped: #39 Whiplash\n",
" Scraped: #40 Harakiri\n",
" Scraped: #41 The Prestige\n",
" Scraped: #42 American History X\n",
" Scraped: #43 Léon: The Professional\n",
" Scraped: #44 Spider-Man: Across the Spider-Verse\n",
" Scraped: #45 Casablanca\n",
" Scraped: #46 Cinema Paradiso\n",
" Scraped: #47 The Usual Suspects\n",
" Scraped: #48 The Intouchables\n",
" Scraped: #49 Alien\n",
" Scraped: #50 Modern Times\n",
" Scraped: #1 The Shawshank Redemption\n",
" Scraped: #2 The Godfather\n",
" Scraped: #3 The Dark Knight\n",
" Scraped: #4 The Godfather Part II\n",
" Scraped: #5 12 Angry Men\n",
" Scraped: #6 The Lord of the Rings: The Return of the King\n",
" Scraped: #7 Schindler's List\n",
" Scraped: #8 Pulp Fiction\n",
" Scraped: #9 The Lord of the Rings: The Fellowship of the Ring\n",
" Scraped: #10 The Good, the Bad and the Ugly\n",
" Scraped: #11 Forrest Gump\n",
" Scraped: #12 The Lord of the Rings: The Two Towers\n",
" Scraped: #13 Fight Club\n",
" Scraped: #14 Inception\n",
" Scraped: #15 Star Wars: Episode V - The Empire Strikes Back\n",
" Scraped: #16 The Matrix\n",
" Scraped: #17 Goodfellas\n",
" Scraped: #18 Interstellar\n",
" Scraped: #19 One Flew Over the Cuckoo's Nest\n",
" Scraped: #20 Se7en\n",
" Scraped: #21 It's a Wonderful Life\n",
" Scraped: #22 The Silence of the Lambs\n",
" Scraped: #23 Seven Samurai\n",
" Scraped: #24 Saving Private Ryan\n",
" Scraped: #25 The Green Mile\n",
" Scraped: #26 City of God\n",
" Scraped: #27 Life Is Beautiful\n",
" Scraped: #28 Terminator 2: Judgment Day\n",
" Scraped: #29 Star Wars: Episode IV - A New Hope\n",
" Scraped: #30 Back to the Future\n",
" Scraped: #31 Spirited Away\n",
" Scraped: #32 The Pianist\n",
" Scraped: #33 Gladiator\n",
" Scraped: #34 Parasite\n",
" Scraped: #35 Psycho\n",
" Scraped: #36 The Lion King\n",
" Scraped: #37 Grave of the Fireflies\n",
" Scraped: #38 The Departed\n",
" Scraped: #39 Whiplash\n",
" Scraped: #40 Harakiri\n",
" Scraped: #41 The Prestige\n",
" Scraped: #42 American History X\n",
" Scraped: #43 Léon: The Professional\n",
" Scraped: #44 Spider-Man: Across the Spider-Verse\n",
" Scraped: #45 Casablanca\n",
" Scraped: #46 Cinema Paradiso\n",
" Scraped: #47 The Usual Suspects\n",
" Scraped: #48 The Intouchables\n",
" Scraped: #49 Alien\n",
" Scraped: #50 Modern Times\n",
"\n",
"Scraping completado. Se extrajeron datos de 50 películas.\n"
]
Expand All @@ -212,15 +212,15 @@
"movie_item_selector = \"li.ipc-metadata-list-summary-item\"\n",
"\n",
"# Pista: Usa el método 'find_elements' del driver para obtener una lista de todos los elementos que coincidan con 'movie_item_selector'.\n",
"movie_elements = # [...COMPLETA AQUÍ...]\n",
"movie_elements = driver.find_elements(By.CSS_SELECTOR, movie_item_selector)\n",
"\n",
"# Iteramos solo sobre las primeras 50 películas\n",
"for movie in movie_elements[:50]:\n",
" try:\n",
" # --- Rango y Título ---\n",
" # Pista: Primero, encuentra el elemento h3 con la clase 'ipc-title__text'. Luego, obtén su '.text'.\n",
" title_element = movie.find_element(By.CSS_SELECTOR, \"h3.ipc-title__text\")\n",
" full_title_text = # [...COMPLETA AQUÍ...]\n",
" title_element = movie.find_element(By.CSS_SELECTOR, \"h3.ipc-title__text\") #BUSCAR SU ETIQUETA\n",
" full_title_text = title_element.text # Dame el texto de ese elemento\n",
" rank, title = full_title_text.split('. ', 1)\n",
"\n",
" # --- Año --- \n",
Expand All @@ -233,15 +233,15 @@
" # --- URL de la película ---\n",
" # Pista: El enlace está en el atributo 'href' de la etiqueta <a>. Usa '.get_attribute()'\n",
" url_element = movie.find_element(By.CSS_SELECTOR, \"a.ipc-title-link-wrapper\")\n",
" movie_url = # [...COMPLETA AQUÍ...]\n",
" movie_url = url_element.get_attribute('href')\n",
"\n",
" # Asegúrate de que los nombres de las variables coincidan con las que creaste arriba.\n",
" movies_data.append({\n",
" \"Rango\": # [...COMPLETA AQUÍ...],\n",
" \"Titulo\": # [...COMPLETA AQUÍ...],\n",
" \"Rango\": rank,\n",
" \"Titulo\": title,\n",
" \"Año\": year,\n",
" \"Calificacion_IMDb\": rating,\n",
" \"URL\": # [...COMPLETA AQUÍ...]\n",
" \"URL\": movie_url\n",
" })\n",
" print(f\" Scraped: #{rank} {title}\")\n",
"\n",
Expand All @@ -264,7 +264,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"id": "318ab0d9",
"metadata": {},
"outputs": [
Expand All @@ -273,7 +273,7 @@
"output_type": "stream",
"text": [
"\n",
"🎉 Datos guardados exitosamente en 'imdb_top_50_peliculas.csv'\n"
" Datos guardados exitosamente.\n"
]
},
{
Expand Down Expand Up @@ -373,16 +373,16 @@
"output_type": "stream",
"text": [
"\n",
"Navegador cerrado correctamente.\n"
"Navegador cerrado correctamente. ¡Ejercicio terminado!\n"
]
}
],
"source": [
"if movies_data:\n",
" # Pista: Llama a pd.DataFrame() y pásale la lista que contiene todos nuestros datos.\n",
" df = pd.DataFrame(# [...COMPLETA AQUÍ...])\n",
" df = pd.DataFrame(movies_data)\n",
" # Pista: Usa el método '.to_csv()'. Dale un nombre de archivo, por ejemplo, \"imdb_top_50.csv\", y no te olvides de poner index=False.\n",
" # [...COMPLETA AQUÍ...]\n",
" df.to_csv(\"imdb_top_50.csv\", index = False)\n",
" \n",
" print(\"\\n Datos guardados exitosamente.\")\n",
" display(df.head())\n",
Expand All @@ -391,7 +391,7 @@
"\n",
"# Cerrar el navegador\n",
"# Pista: Hay un método en el objeto 'driver' para cerrar todas las ventanas y terminar la sesión.\n",
"# [...COMPLETA AQUÍ...]\n",
"driver.quit()\n",
"\n",
"print(\"\\nNavegador cerrado correctamente. ¡Ejercicio terminado!\")\n"
]
Expand Down