From 8c10b394c6ea9566209af628e95701dab395b4b5 Mon Sep 17 00:00:00 2001 From: attarazonad-stack Date: Wed, 20 Aug 2025 09:51:21 -0500 Subject: [PATCH] Add files via upload --- .../LAB2/Imbd_lab_students.ipynb | 146 +++++++++--------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/Labs/Python_Notebooks/LAB2/Imbd_lab_students.ipynb b/Labs/Python_Notebooks/LAB2/Imbd_lab_students.ipynb index 9359e62a..dbf7b06a 100644 --- a/Labs/Python_Notebooks/LAB2/Imbd_lab_students.ipynb +++ b/Labs/Python_Notebooks/LAB2/Imbd_lab_students.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "id": "dc2d3ffc", "metadata": {}, "outputs": [], @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "b2aaba9b", "metadata": {}, "outputs": [ @@ -62,14 +62,14 @@ "source": [ "# Configuramos las opciones de Chrome\n", "chrome_options = Options()\n", - "# chrome_options.add_argument(\"--headless\")\n", + "#chrome_options.add_argument(\"--headless\")\n", "chrome_options.add_argument(\"--start-maximized\")\n", "chrome_options.add_argument(\"--lang=en-US\")\n", "\n", "# Iniciar el WebDriver de Chrome\n", "# Pista: Crea una variable llamada 'driver' y asígnale la instancia de webdriver.Chrome(),\n", "# pasando nuestras 'chrome_options' como argumento.\n", - "driver = # [...COMPLETA AQUÍ...]\n", + "driver = webdriver.Chrome(options=chrome_options)\n", "\n", "print(\"WebDriver iniciado con éxito.\")" ] @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "a92ee23e", "metadata": {}, "outputs": [ @@ -95,7 +95,7 @@ "output_type": "stream", "text": [ "Esperando a que la lista de películas cargue...\n", - "Lista de películas encontrada. Comenzando el scraping.\n" + "Lista de películas encontrada. ¡A scrapear!\n" ] } ], @@ -104,7 +104,7 @@ "url = \"https://www.imdb.com/chart/top/\"\n", "\n", "# Pista: El objeto 'driver' tiene un método para abrir una URL. ¿Cuál es?\n", - "# [...COMPLETA AQUÍ...]\n", + "driver.get(url)\n", "\n", "# Lista para guardar los datos de cada película\n", "movies_data = []\n", @@ -117,8 +117,8 @@ " \n", " # Completa la espera para que el script se detenga hasta que la lista de películas sea visible.\n", " # Pista: Usa EC.visibility_of_element_located() y pásale una tupla con el método de búsqueda (By) y el selector.\n", - " WebDriverWait(driver, 10).until(\n", - " EC.visibility_of_element_located(( # [...COMPLETA AQUÍ EL MÉTODO By Y EL SELECTOR...] ))\n", + " WebDriverWait(driver, 10).until( #Te permite demorarte\n", + " EC.visibility_of_element_located((By.CSS_SELECTOR, movie_list_selector))\n", " )\n", " print(\"Lista de películas encontrada. ¡A scrapear!\")\n", "\n", @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "4d0d1eca", "metadata": {}, "outputs": [ @@ -152,56 +152,56 @@ "name": "stdout", "output_type": "stream", "text": [ - "✅ Scraped: #1 The Shawshank Redemption\n", - "✅ Scraped: #2 The Godfather\n", - "✅ Scraped: #3 The Dark Knight\n", - "✅ Scraped: #4 The Godfather Part II\n", - "✅ Scraped: #5 12 Angry Men\n", - "✅ Scraped: #6 The Lord of the Rings: The Return of the King\n", - "✅ Scraped: #7 Schindler's List\n", - "✅ Scraped: #8 Pulp Fiction\n", - "✅ Scraped: #9 The Lord of the Rings: The Fellowship of the Ring\n", - "✅ Scraped: #10 The Good, the Bad and the Ugly\n", - "✅ Scraped: #11 Forrest Gump\n", - "✅ Scraped: #12 The Lord of the Rings: The Two Towers\n", - "✅ Scraped: #13 Fight Club\n", - "✅ Scraped: #14 Inception\n", - "✅ Scraped: #15 Star Wars: Episode V - The Empire Strikes Back\n", - "✅ Scraped: #16 The Matrix\n", - "✅ Scraped: #17 Goodfellas\n", - "✅ Scraped: #18 Interstellar\n", - "✅ Scraped: #19 One Flew Over the Cuckoo's Nest\n", - "✅ Scraped: #20 Se7en\n", - "✅ Scraped: #21 It's a Wonderful Life\n", - "✅ Scraped: #22 The Silence of the Lambs\n", - "✅ Scraped: #23 Seven Samurai\n", - "✅ Scraped: #24 Saving Private Ryan\n", - "✅ Scraped: #25 The Green Mile\n", - "✅ Scraped: #26 City of God\n", - "✅ Scraped: #27 Life Is Beautiful\n", - "✅ Scraped: #28 Terminator 2: Judgment Day\n", - "✅ Scraped: #29 Star Wars: Episode IV - A New Hope\n", - "✅ Scraped: #30 Back to the Future\n", - "✅ Scraped: #31 Spirited Away\n", - "✅ Scraped: #32 The Pianist\n", - "✅ Scraped: #33 Gladiator\n", - "✅ Scraped: #34 Parasite\n", - "✅ Scraped: #35 Psycho\n", - "✅ Scraped: #36 The Lion King\n", - "✅ Scraped: #37 Grave of the Fireflies\n", - "✅ Scraped: #38 The Departed\n", - "✅ Scraped: #39 Whiplash\n", - "✅ Scraped: #40 Harakiri\n", - "✅ Scraped: #41 The Prestige\n", - "✅ Scraped: #42 American History X\n", - "✅ Scraped: #43 Léon: The Professional\n", - "✅ Scraped: #44 Spider-Man: Across the Spider-Verse\n", - "✅ Scraped: #45 Casablanca\n", - "✅ Scraped: #46 Cinema Paradiso\n", - "✅ Scraped: #47 The Usual Suspects\n", - "✅ Scraped: #48 The Intouchables\n", - "✅ Scraped: #49 Alien\n", - "✅ Scraped: #50 Modern Times\n", + " Scraped: #1 The Shawshank Redemption\n", + " Scraped: #2 The Godfather\n", + " Scraped: #3 The Dark Knight\n", + " Scraped: #4 The Godfather Part II\n", + " Scraped: #5 12 Angry Men\n", + " Scraped: #6 The Lord of the Rings: The Return of the King\n", + " Scraped: #7 Schindler's List\n", + " Scraped: #8 Pulp Fiction\n", + " Scraped: #9 The Lord of the Rings: The Fellowship of the Ring\n", + " Scraped: #10 The Good, the Bad and the Ugly\n", + " Scraped: #11 Forrest Gump\n", + " Scraped: #12 The Lord of the Rings: The Two Towers\n", + " Scraped: #13 Fight Club\n", + " Scraped: #14 Inception\n", + " Scraped: #15 Star Wars: Episode V - The Empire Strikes Back\n", + " Scraped: #16 The Matrix\n", + " Scraped: #17 Goodfellas\n", + " Scraped: #18 Interstellar\n", + " Scraped: #19 One Flew Over the Cuckoo's Nest\n", + " Scraped: #20 Se7en\n", + " Scraped: #21 It's a Wonderful Life\n", + " Scraped: #22 The Silence of the Lambs\n", + " Scraped: #23 Seven Samurai\n", + " Scraped: #24 Saving Private Ryan\n", + " Scraped: #25 The Green Mile\n", + " Scraped: #26 City of God\n", + " Scraped: #27 Life Is Beautiful\n", + " Scraped: #28 Terminator 2: Judgment Day\n", + " Scraped: #29 Star Wars: Episode IV - A New Hope\n", + " Scraped: #30 Back to the Future\n", + " Scraped: #31 Spirited Away\n", + " Scraped: #32 The Pianist\n", + " Scraped: #33 Gladiator\n", + " Scraped: #34 Parasite\n", + " Scraped: #35 Psycho\n", + " Scraped: #36 The Lion King\n", + " Scraped: #37 Grave of the Fireflies\n", + " Scraped: #38 The Departed\n", + " Scraped: #39 Whiplash\n", + " Scraped: #40 Harakiri\n", + " Scraped: #41 The Prestige\n", + " Scraped: #42 American History X\n", + " Scraped: #43 Léon: The Professional\n", + " Scraped: #44 Spider-Man: Across the Spider-Verse\n", + " Scraped: #45 Casablanca\n", + " Scraped: #46 Cinema Paradiso\n", + " Scraped: #47 The Usual Suspects\n", + " Scraped: #48 The Intouchables\n", + " Scraped: #49 Alien\n", + " Scraped: #50 Modern Times\n", "\n", "Scraping completado. Se extrajeron datos de 50 películas.\n" ] @@ -212,15 +212,15 @@ "movie_item_selector = \"li.ipc-metadata-list-summary-item\"\n", "\n", "# Pista: Usa el método 'find_elements' del driver para obtener una lista de todos los elementos que coincidan con 'movie_item_selector'.\n", - "movie_elements = # [...COMPLETA AQUÍ...]\n", + "movie_elements = driver.find_elements(By.CSS_SELECTOR, movie_item_selector)\n", "\n", "# Iteramos solo sobre las primeras 50 películas\n", "for movie in movie_elements[:50]:\n", " try:\n", " # --- Rango y Título ---\n", " # Pista: Primero, encuentra el elemento h3 con la clase 'ipc-title__text'. Luego, obtén su '.text'.\n", - " title_element = movie.find_element(By.CSS_SELECTOR, \"h3.ipc-title__text\")\n", - " full_title_text = # [...COMPLETA AQUÍ...]\n", + " title_element = movie.find_element(By.CSS_SELECTOR, \"h3.ipc-title__text\") #BUSCAR SU ETIQUETA\n", + " full_title_text = title_element.text # Dame el texto de ese elemento\n", " rank, title = full_title_text.split('. ', 1)\n", "\n", " # --- Año --- \n", @@ -233,15 +233,15 @@ " # --- URL de la película ---\n", " # Pista: El enlace está en el atributo 'href' de la etiqueta . Usa '.get_attribute()'\n", " url_element = movie.find_element(By.CSS_SELECTOR, \"a.ipc-title-link-wrapper\")\n", - " movie_url = # [...COMPLETA AQUÍ...]\n", + " movie_url = url_element.get_attribute('href')\n", "\n", " # Asegúrate de que los nombres de las variables coincidan con las que creaste arriba.\n", " movies_data.append({\n", - " \"Rango\": # [...COMPLETA AQUÍ...],\n", - " \"Titulo\": # [...COMPLETA AQUÍ...],\n", + " \"Rango\": rank,\n", + " \"Titulo\": title,\n", " \"Año\": year,\n", " \"Calificacion_IMDb\": rating,\n", - " \"URL\": # [...COMPLETA AQUÍ...]\n", + " \"URL\": movie_url\n", " })\n", " print(f\" Scraped: #{rank} {title}\")\n", "\n", @@ -264,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "318ab0d9", "metadata": {}, "outputs": [ @@ -273,7 +273,7 @@ "output_type": "stream", "text": [ "\n", - "🎉 Datos guardados exitosamente en 'imdb_top_50_peliculas.csv'\n" + " Datos guardados exitosamente.\n" ] }, { @@ -373,16 +373,16 @@ "output_type": "stream", "text": [ "\n", - "Navegador cerrado correctamente.\n" + "Navegador cerrado correctamente. ¡Ejercicio terminado!\n" ] } ], "source": [ "if movies_data:\n", " # Pista: Llama a pd.DataFrame() y pásale la lista que contiene todos nuestros datos.\n", - " df = pd.DataFrame(# [...COMPLETA AQUÍ...])\n", + " df = pd.DataFrame(movies_data)\n", " # Pista: Usa el método '.to_csv()'. Dale un nombre de archivo, por ejemplo, \"imdb_top_50.csv\", y no te olvides de poner index=False.\n", - " # [...COMPLETA AQUÍ...]\n", + " df.to_csv(\"imdb_top_50.csv\", index = False)\n", " \n", " print(\"\\n Datos guardados exitosamente.\")\n", " display(df.head())\n", @@ -391,7 +391,7 @@ "\n", "# Cerrar el navegador\n", "# Pista: Hay un método en el objeto 'driver' para cerrar todas las ventanas y terminar la sesión.\n", - "# [...COMPLETA AQUÍ...]\n", + "driver.quit()\n", "\n", "print(\"\\nNavegador cerrado correctamente. ¡Ejercicio terminado!\")\n" ]