diff --git a/servel_scraper/data_extractor/csv_writer.py b/servel_scraper/data_extractor/csv_writer.py index 89abd88..09b4a2f 100644 --- a/servel_scraper/data_extractor/csv_writer.py +++ b/servel_scraper/data_extractor/csv_writer.py @@ -3,7 +3,7 @@ from servel_scraper.data_extractor.person import Person -OUT_CSV_DELIMITER = ',' +OUT_CSV_DELIMITER = ';' OUT_CSV_QUOTE_CHAR = '"' @@ -16,7 +16,7 @@ def write_rows(self, people: List[Person]): self.write_row(p) def write_row(self, p: Person): - self._csv_writer.writerow([p.name, p.rut, str(p.gender), p.address]) + self._csv_writer.writerow([p.name, p.rut, str(p.gender), p.address, p.comuna]) class CSVExporter(object): diff --git a/servel_scraper/data_extractor/person.py b/servel_scraper/data_extractor/person.py index 9128b90..a4801f3 100644 --- a/servel_scraper/data_extractor/person.py +++ b/servel_scraper/data_extractor/person.py @@ -25,3 +25,4 @@ class Person: rut: str address: str gender: Gender + comuna: str diff --git a/servel_scraper/data_extractor/servel_pdf_stream_page.py b/servel_scraper/data_extractor/servel_pdf_stream_page.py index 9e4a64a..4d95bc0 100644 --- a/servel_scraper/data_extractor/servel_pdf_stream_page.py +++ b/servel_scraper/data_extractor/servel_pdf_stream_page.py @@ -27,6 +27,7 @@ RUT_FIELD_INDEX = 1 GENDER_FIELD_INDEX = 2 ADDRESS_FIELD_INDEX = 3 +COMUNA_FIELD_INDEX = 4 class ServelPDFStreamPage(object): @@ -109,7 +110,8 @@ def _convert_row_of_strings_to_person(cls, row: List[str]) -> Person: name=row[NAME_FIELD_INDEX], rut=row[RUT_FIELD_INDEX], gender=cls._str_gender_to_gender(row[GENDER_FIELD_INDEX]), - address=row[ADDRESS_FIELD_INDEX] + address=row[ADDRESS_FIELD_INDEX], + comuna=row[COMUNA_FIELD_INDEX] ) @staticmethod diff --git a/servel_scraper/data_extractor/test_extractor.py b/servel_scraper/data_extractor/test_extractor.py index 6ffbbf9..c48fb2d 100644 --- a/servel_scraper/data_extractor/test_extractor.py +++ b/servel_scraper/data_extractor/test_extractor.py @@ -16,8 +16,10 @@ def test_extract_with_valid_file__must_be_extract_data(self): self.assertEqual(persons[0].gender, Gender.MALE) self.assertEqual(persons[0].rut, '13.392.711-5') self.assertEqual(persons[0].address, 'ISLA REY JORGE SIN N') + self.assertEqual(persons[0].comuna, 'ANTARTICA') # Ultima persona self.assertEqual(persons[EXPECTED_SIZE - 1].name, 'ZUÑIGA MIRANDA MARIA ELENA') self.assertEqual(persons[EXPECTED_SIZE - 1].gender, Gender.FEMALE) self.assertEqual(persons[EXPECTED_SIZE - 1].rut, '11.660.676-3') self.assertEqual(persons[EXPECTED_SIZE - 1].address, 'BA E FREI') + self.assertEqual(persons[EXPECTED_SIZE - 1].comuna, 'ANTARTICA') diff --git a/servel_scraper/downloader/servel_file_repo.py b/servel_scraper/downloader/servel_file_repo.py index c338802..6a89b5e 100644 --- a/servel_scraper/downloader/servel_file_repo.py +++ b/servel_scraper/downloader/servel_file_repo.py @@ -7,7 +7,7 @@ from servel_scraper.downloader.file_downloader import FileDownloader from tqdm import tqdm -CDN_BASE_URL = 'http://cdn.servel.cl/padron' +CDN_BASE_URL = 'http://www.paltabi.cl/secure' _DEFAULT_CUT_COLUMNS_CSV: Dict[CUTField, str] = { CUTField.NOMBRE_REGION: 'Nombre Región', @@ -28,8 +28,8 @@ class ServerFilesUrlRepo(object): def get_servel_files_urls(cls, csv_path: str) -> List[str]: """ i.e: [ - "http://cdn.servel.cl/padron/A01107.pdf", - "http://cdn.servel.cl/padron/01101.pdf", + "http://http://www.paltabi.cl/secure/A01107.pdf", + "http://http://www.paltabi.cl/secure/01101.pdf", ... ] """ @@ -48,7 +48,7 @@ def _get_cuts(csv_path: str) -> List[CUT]: @staticmethod def _build_url_from_cut(cut: CUT) -> Url: """ - i.e: "http://cdn.servel.cl/padron/A01107.pdf" + i.e: "http://www.paltabi.cl/secure/A01107.pdf" """ return f"{CDN_BASE_URL}/A{cut.codigo_comuna}.pdf" @@ -68,7 +68,7 @@ def download_servel_files(out_path: str, csv_path: str = DEFAULT_CUT_CSV_PATH) - result = DownloadResult(failed={}, success=[]) pbar = tqdm(urls) for url in pbar: - pbar.set_description(f"Downloading: {url}") + pbar.set_description(f"Descargando: {url}") try: file_downloader.download_file(out_path, url) except (BaseException, Exception) as e: