From 840c2a8b019e33bb25966d0c7977e50228fdc681 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:20:46 -0300 Subject: [PATCH 01/13] =?UTF-8?q?Documentacao=20do=20procedimento=20scielo?= =?UTF-8?q?=5Fnetwork=20que=20faz=20o=20envio=20de=20dados=20de=20uma=20co?= =?UTF-8?q?le=C3=A7=C3=A3o=20para=20o=20n=C3=B3=20central?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/scielo_network/README.md | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 proc/scielo_network/README.md diff --git a/proc/scielo_network/README.md b/proc/scielo_network/README.md new file mode 100644 index 000000000..b79dcb113 --- /dev/null +++ b/proc/scielo_network/README.md @@ -0,0 +1,59 @@ +# scielo_network + +## Objetivos + +Enviar os metadados da coleção para o nó central para fornecer serviços no âmbito da rede SciELO. + +Os metadados são de periódicos, fascículos e documentos, e lista de arquivos PDF dos documentos. + + +## Motivação + +O envio de arquivos `.iso` atual apresenta muitos problemas, pois podem vir corrompidos. + +Ausência nos metadados a informação dos PDFs correspondentes aos documentos e, esta informação é necessária para identificar os idiomas disponíveis dos documentos, principalmente quando têm tradução. + + +## Origem dos metadados + +- `bases/title/title` +- `bases/artigo/artigo` +- `bases/pdf/*` + + +## Sobre os scripts + +A partir de `bases/pdf/*` e do comando `find`, é obtida uma lista de pdfs no padrão: + +``` +../bases/pdf//v17n1/v17n1a16-es.pdf +../bases/pdf//v17n1/v17n1a04-es.pdf +../bases/pdf//v17n1/v17n1a12-es.pdf +``` + +Esta lista é criada em um arquivo no diretório `temp/scielo_network`. E na sequência, é feita a transferência. + +A partir de `bases/title/title`, gera um arquivo do tipo `*.id` com o utilitário `cisis/i2id` no diretório temporário `temp/scielo_network` e, na sequência, é feita a transferência. + +A partir de `scielo_network_in.txt`, obtido do ftp cadastrado e que contém a lista de PID + data de atualização do registro da base ISIS, é gerada uma lista com os documentos novos e/ou atualizados consultando `bases-work/artigo/artigo`. A partir da lista que contém os itens novos ou atualizados, um arquivo `artigo_*.id` é gerado no diretório temporário `temp/scielo_network` para cada documento usando o utilitário `cisis/i2id` e sua transferência é feita. + + +As credenciais das transferências podem ser obtidas de um dos arquivos: + +- transf/Envia2MedlineLogOn.txt +- transf/Envia2SciELOFastLogOn.txt +- transf/Envia2SciELONetworkLogOn.txt + +Cada transferência envia o arquivo em si, seu arquivo compactado e um arquivo `scielo_network_time.log` contendo acúmulo de data, hora e eventos ocorridos. + +São executadas em concorrência as operações: + +- geração dos arquivos `scielo_network_artigo_*.id` +- geração de `scielo_network_pdfs_list.txt` +- geração de `scielo_network_title.id` +- geração dos arquivos `scielo_network_i_*.id` + + +# Script proc/Envia2SciELONetworkPadrao.bat + +Chama `scielo_network/main_generate_and_transfer_new_and_updated.bat` com os parâmetros adequados. From 86045d2189c8e88bc5e505e8f43e422db9e189aa Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:22:14 -0300 Subject: [PATCH 02/13] Registra os eventos em arquivo de log --- proc/scielo_network/InformaLog.bat | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100755 proc/scielo_network/InformaLog.bat diff --git a/proc/scielo_network/InformaLog.bat b/proc/scielo_network/InformaLog.bat new file mode 100755 index 000000000..e6c4f3d21 --- /dev/null +++ b/proc/scielo_network/InformaLog.bat @@ -0,0 +1,28 @@ +export PATH=$PATH:. + +rem scielo_network/InformaLog +rem Parametro 1: log file +rem Parametro 2: nome do batch + +rem Inicializa variaveis +LOG_FILE=$1 +SCRIPT_NAME=$2 +PARAM_3=$3 +PARAM_4=$4 +PARAM_5=$5 +PARAM_6=$6 +PARAM_7=$7 +PARAM_8=$8 +PARAM_9=$9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${SCRIPT_NAME} "nome do batch" +call batch/VerifPresencaParametro.bat $0 @${LOG_FILE} "log file" + + +echo `date '+%Y%m%d %H:%M:%S'` $2 $3 $4 $5 $6 $7 $8 $9 >> ${TIME_LOG} + +echo `date '+%Y%m%d %H:%M:%S'` >> ${LOG_FILE} +echo [${SCRIPT_NAME}] >> ${LOG_FILE} +echo ${PARAM_3} ${PARAM_4} ${PARAM_5} ${PARAM_6} ${PARAM_7} ${PARAM_8} ${PARAM_9} >> ${LOG_FILE} +echo >> ${LOG_FILE} From 32dd307b097054a004acff7b6562c456b251871b Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:23:18 -0300 Subject: [PATCH 03/13] Cria script que transfere arquivos com get e put --- proc/scielo_network/transf.bat | 84 ++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 proc/scielo_network/transf.bat diff --git a/proc/scielo_network/transf.bat b/proc/scielo_network/transf.bat new file mode 100755 index 000000000..73f5a1013 --- /dev/null +++ b/proc/scielo_network/transf.bat @@ -0,0 +1,84 @@ +export PATH=$PATH:. + +rem scielo_network/transf +rem Parametro 1: log file +rem Parametro 2: temp path +rem Parametro 3: file to transfer +rem Parametro 4: bin or asc + +rem Inicializa variaveis +LOGFILE=$1 +TMP_PATH=$2 +FILE_TO_TRANSFER=$3 +FTP_BIN_OR_ASC=$4 +GET_OR_PUT=$5 + +if [ "@${GET_OR_PUT}" == "@" ] +then + GET_OR_PUT=put +fi + +rem Inicio +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${LOGFILE} "log file" +call batch/VerifPresencaParametro.bat $0 @${TMP_PATH} "tmp path" +call batch/VerifPresencaParametro.bat $0 @${FILE_TO_TRANSFER} "file to transfer" +call batch/VerifPresencaParametro.bat $0 @${FTP_BIN_OR_ASC} "bin or asc" + +call batch/VerifExisteArquivo.bat ${TMP_PATH}/${FILE_TO_TRANSFER} + +rem Create ftp instructions file +FTP_INSTR=${TMP_PATH}/ftp_instructions_${FILE_TO_TRANSFER}.txt +if [ ! -f ${FTP_INSTR} ] +then + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Create ${FTP_INSTR}" + + FTP_HEAD_FILE_PATH=${TMP_PATH}/ftp_head + + if [ ! -f ${FTP_HEAD_FILE_PATH} ] + then + FTP_INSTR_ALT1=transf/Envia2MedlineLogOn.txt + FTP_INSTR_ALT2=transf/Envia2SciELOFastLogOn.txt + FTP_INSTR_ALT3=transf/Envia2SciELONetworkLogOn.txt + if [ -f ${FTP_INSTR_ALT1} ] + then + head -n 3 ${FTP_INSTR_ALT1} > ${FTP_HEAD_FILE_PATH} + fi + if [ -f ${FTP_INSTR_ALT2} ] + then + head -n 3 ${FTP_INSTR_ALT2} > ${FTP_HEAD_FILE_PATH} + fi + if [ -f ${FTP_INSTR_ALT3} ] + then + head -n 3 ${FTP_INSTR_ALT3} > ${FTP_HEAD_FILE_PATH} + fi + fi + + call batch/VerifExisteArquivo.bat ${FTP_HEAD_FILE_PATH} + + cat ${FTP_HEAD_FILE_PATH} > ${FTP_INSTR} + echo "${FTP_BIN_OR_ASC}" >> ${FTP_INSTR} + echo "lcd ${TMP_PATH}" >> ${FTP_INSTR} + echo "${GET_OR_PUT} ${FILE_TO_TRANSFER}" >> ${FTP_INSTR} + if [ "put" == "${GET_OR_PUT}" -a "@"!="@${TIME_LOG}" -a -f ${TIME_LOG} ] + then + echo "put `basename ${TIME_LOG}`" >> ${FTP_INSTR} + fi + echo "close" >> ${FTP_INSTR} + echo "bye" >> ${FTP_INSTR} +fi +call batch/VerifExisteArquivo.bat ${FTP_INSTR} + +ftp -n < ${FTP_INSTR} >> ${LOGFILE} + +rem Delete ftp instruction file +call batch/DeletaArquivo.bat ${FTP_INSTR} + +rem Register errors +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" From f12f9b1712c8604f2c852389db85c717a5dd6493 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:24:28 -0300 Subject: [PATCH 04/13] Cria um script para obter a listagem de pdfs do site --- proc/scielo_network/pdfs_list.bat | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 proc/scielo_network/pdfs_list.bat diff --git a/proc/scielo_network/pdfs_list.bat b/proc/scielo_network/pdfs_list.bat new file mode 100755 index 000000000..c3b766f36 --- /dev/null +++ b/proc/scielo_network/pdfs_list.bat @@ -0,0 +1,42 @@ +export PATH=$PATH:. + +rem scielo_network/pdfs_list +rem Parametro 1: temp path +rem Parametro 2: caminho da pasta bases/pdf +rem Parametro 3: somente nome do arquivo da lista de pdfs + +rem Inicializa variaveis +TMP_PATH=$1 +BASES_PDF_PATH=$2 +PDF_LIST_NAME=$3 + +call scielo_network/InformaLog.bat ${INFORMALOG} $0 "FINISHED" +LOGFILE=${TMP_PATH}/log_pdfs_list.log + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${TMP_PATH} "temp path" +call batch/VerifPresencaParametro.bat $0 @${BASES_PDF_PATH} "caminho da pasta bases/pdf" +call batch/VerifPresencaParametro.bat $0 @${PDF_LIST_NAME} "somente nome do arquivo da lista de pdfs" + + +rem Generates PDF list +call scielo_network/InformaLog.bat ${LOGFILE} $0 "Generates PDF list ${TMP_PATH}/${PDF_LIST_NAME}" + +find ${BASES_PDF_PATH} -name "*.pdf" > ${TMP_PATH}/${PDF_LIST_NAME} + +rem Transfer +call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${PDF_LIST_NAME} bin + +rem Delete pids_list.* +call batch/DeletaArquivo.bat ${TMP_PATH}/${PDF_LIST_NAME} +call batch/DeletaArquivo.bat ${TMP_PATH}/${PDF_LIST_NAME}.tgz + +rem Register errors +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" + From 19267ba9af7a4a37a233800abff4743d799a95ca Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:26:07 -0300 Subject: [PATCH 05/13] =?UTF-8?q?Cria=20arquivo=20id=20e=20faz=20a=20sua?= =?UTF-8?q?=20transfer=C3=AAncia?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/scielo_network/id_generate.bat | 56 +++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100755 proc/scielo_network/id_generate.bat diff --git a/proc/scielo_network/id_generate.bat b/proc/scielo_network/id_generate.bat new file mode 100755 index 000000000..2b7e41b8e --- /dev/null +++ b/proc/scielo_network/id_generate.bat @@ -0,0 +1,56 @@ +export PATH=$PATH:. + +rem scielo_network/id_generate +rem Parametro 1: temp path +rem Parametro 2: isis database file without extension +rem Parametro 3: id filename + +rem Inicializa variaveis +TMP_PATH=$1 +MST=$2 +ID_FILE=$3 + +LOGFILE=${TMP_PATH}/log_id_generate_${ID_FILE}.log +CISIS_DIR=cisis + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${TMP_PATH} "temp path" +call batch/VerifPresencaParametro.bat $0 @${MST} "isis database file without extension" +call batch/VerifPresencaParametro.bat $0 @${ID_FILE} "id filename" + +call batch/VerifExisteArquivo.bat ${MST}.mst + + +rem Generate id file +call scielo_network/InformaLog.bat ${LOGFILE} $0 "Create ${ID_FILE}" +${CISIS_DIR}/i2id ${MST} > ${TMP_PATH}/${ID_FILE} + +call batch/VerifExisteArquivo.bat ${TMP_PATH}/${ID_FILE} + +rem Transfer id file +call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${ID_FILE} bin + + +rem Generate tgz file +call scielo_network/InformaLog.bat ${LOGFILE} $0 "Generate tgz file ${ID_FILE}" +back=`pwd` +cd ${TMP_PATH} +tar cvfzp ${ID_FILE}.tgz ${ID_FILE} +cd $back +rem Transfer tgz file +call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${ID_FILE}.tgz bin + + +rem Delete id file and id file tgz +call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILE} +call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILE}.tgz + + +rem Register errors +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" From 039763c7d12cf16517f2d208309acecf3f09c701 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:27:36 -0300 Subject: [PATCH 06/13] Seleciona registros da base artigo e deles gera um arquivo id e o transfere --- .../generate_id_from_article.bat | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 proc/scielo_network/generate_id_from_article.bat diff --git a/proc/scielo_network/generate_id_from_article.bat b/proc/scielo_network/generate_id_from_article.bat new file mode 100644 index 000000000..991502a66 --- /dev/null +++ b/proc/scielo_network/generate_id_from_article.bat @@ -0,0 +1,43 @@ +export PATH=$PATH:. + +rem scielo_network/generate_id_from_article +rem Parametro 1: temp path +rem Parametro 2: bases path +rem Parametro 3: key +rem Parametro 4: id filename + +rem Inicializa variaveis +TMP_PATH=$1 +BASES_PATH=$2 +KEY=$3 +ID_FILENAME=$4 + +rem Inicializa variaveis +LOGFILE=${TMP_PATH}/log_generate_id_from_article_${ID_FILENAME}.log + +CISIS_DIR=cisis +ARTICLE_DB=${BASES_PATH}/artigo/artigo + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${TMP_PATH} "temp path" +call batch/VerifPresencaParametro.bat $0 @${BASES_PATH} "bases path" +call batch/VerifPresencaParametro.bat $0 @${KEY} "key" +call batch/VerifPresencaParametro.bat $0 @${ID_FILENAME} "id filename" + +call batch/VerifExisteArquivo.bat ${ARTICLE_DB}.mst + +rem Generate and transfer scilista issue +call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer ${ID_FILENAME}" +${CISIS_DIR}/mx null count=0 create=${TMP_PATH}/${ID_FILENAME} now -all +${CISIS_DIR}/mx ${ARTICLE_DB} "bool=${KEY}" append=${TMP_PATH}/${ID_FILENAME} now -all +call scielo_network/id_generate.bat ${TMP_PATH} ${TMP_PATH}/${ID_FILENAME} ${ID_FILENAME}.id + +call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILENAME}.mst +call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILENAME}.xrf + +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" From 21d7fdcb2a78230c7783f1a3e97940f5a7d4f552 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:29:17 -0300 Subject: [PATCH 07/13] =?UTF-8?q?Cria=20script=20que=20dispara=20a=20gera?= =?UTF-8?q?=C3=A7=C3=A3o=20e=20transfer=C3=AAncia=20de:?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - arquivos `scielo_network_artigo_*.id` - `scielo_network_pdfs_list.txt` - `scielo_network_title.id` - arquivos `scielo_network_i_*.id` --- ..._generate_and_transfer_new_and_updated.bat | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 proc/scielo_network/main_generate_and_transfer_new_and_updated.bat diff --git a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat new file mode 100644 index 000000000..5a4e8b472 --- /dev/null +++ b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat @@ -0,0 +1,95 @@ +export PATH=$PATH:. +export CIPAR=tabs/GIGA032.cip + +rem scielo_network/main_generate_and_transfer_new_and_updated +rem Parametro 1: bases path +rem Parametro 2: bases/pdf path + +rem Inicializa variaveis +BASES_PATH=$1 +BASES_PDF_PATH=$2 + + +rem Inicializa variaveis +CISIS_DIR=cisis +TMP_PATH=temp/scielo_network + +LOGFILE=${TMP_PATH}/log_main_generate_and_transfer_new_and_updated.log +export INFORMALOG=${TMP_PATH}/log_errors.log +export TIME_LOG=${TMP_PATH}/scielo_network_time_`date '+%d'`_new_and_updated.log + +ARTIGO=${BASES_PATH}/artigo/artigo +TITLE_MST=${BASES_PATH}/title/title +TITLE_ID=scielo_network_title.id +PDF_LIST=scielo_network_pdfs_list.txt + +echo > ${TIME_LOG} + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${BASES_PATH} "bases path" + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "Create ${TMP_PATH}" +call batch/CriaDiretorio.bat ${TMP_PATH} + +rem Obtém scielo_network_in.txt +call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} scielo_network_in.txt bin get + +rem Usa scielo_network_in.txt como indicativo +rem de que a colecao tem permissao para enviar os dados +rem lista de pid concatenado com a data de atualizacao v91 +rem PID_YYYYMMDD +if [ -f ${TMP_PATH}/scielo_network_in.txt ] +then + + rem ########################################### + rem Generate and transfer scielo_network_artigo_* + rem ########################################### + rem Gera uma base isis SELECTED_DOCS e a indexa + cisis/mx seq=${TMP_PATH}/scielo_network_in.txt create=${TMP_PATH}/SELECTED_DOCS now -all + cisis/mx ${TMP_PATH}/SELECTED_DOCS "fst=1 0 v1/" fullinv=${TMP_PATH}/SELECTED_DOCS now -all + + rem Usa a base SELECTED_DOCS indexada para identificar os documentos que são novos ou desatualizados + cisis/mx ${ARTIGO} btell=0 tp=h lw=9999 "pft=if l(['${TMP_PATH}/SELECTED_DOCS']v880,'_',ref(mfn-1,v91)) = 0 then v880/ fi" now > ${TMP_PATH}/NOT_scielo_network_in.txt + + rem Gera as bases e arquivos id dos documentos que são novos ou desatualizados + cisis/mx seq=${TMP_PATH}/NOT_scielo_network_in.txt lw=9999 "pft=if p(v1) then './scielo_network/generate_id_from_article.bat ${TMP_PATH} ${BASES_PATH} IV=',v1,'$ scielo_network_artigo_',v1/ fi" now > ${TMP_PATH}/NOT_IN_SCIELO_NETWORK.bat + chmod +x ${TMP_PATH}/NOT_IN_SCIELO_NETWORK.bat + nohup ${TMP_PATH}/NOT_IN_SCIELO_NETWORK.bat > ${TMP_PATH}/nohup.scielo_network_artigo.out& + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_artigo.out" + + + rem ########################################### + rem Generate and transfer scielo_network_pdfs_list.txt + rem ########################################### + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer ${PDF_LIST}" + nohup scielo_network/pdfs_list.bat ${TMP_PATH} ${BASES_PDF_PATH} ${PDF_LIST} > ${TMP_PATH}/nohup.scielo_network_pdfs_list.out& + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_pdfs_list.out" + + + rem ########################################### + rem Generate and transfer scielo_network_title.* + rem ########################################### + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer ${TITLE_ID}" + nohup scielo_network/id_generate.bat ${TMP_PATH} ${TITLE_MST} ${TITLE_ID} > ${TMP_PATH}/nohup.scielo_network_title.out& + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_title.out" + + + rem ########################################### + rem Generate and transfer scielo_network_i_* + rem ########################################### + rem Gera as bases e arquivos id dos issues dos documentos que são novos ou desatualizados + cisis/mx seq=${TMP_PATH}/NOT_scielo_network_in.txt lw=9999 "pft=v1*1.17/" now | sort -u > ${TMP_PATH}/ISSUE_NOT_scielo_network_in.txt + cisis/mx seq=${TMP_PATH}/ISSUE_NOT_scielo_network_in.txt lw=9999 "pft=if p(v1) then './scielo_network/generate_id_from_article.bat ${TMP_PATH} ${BASES_PATH} Y',v1,'$ scielo_network_i_',v1/ fi" now > ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat + chmod +x ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat + nohup ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat > ${TMP_PATH}/nohup.scielo_network_i.out& + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_i.out" + +fi + + +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" From 1414ffba1df71b5c827046db6d20b2259d7e35d6 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:39:38 -0300 Subject: [PATCH 08/13] Cria script que chama `./scielo_network/main_generate_and_transfer_new_and_updated.bat` parametrizado --- proc/Envia2SciELONetworkPadrao.bat | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 proc/Envia2SciELONetworkPadrao.bat diff --git a/proc/Envia2SciELONetworkPadrao.bat b/proc/Envia2SciELONetworkPadrao.bat new file mode 100755 index 000000000..d3b7d53e5 --- /dev/null +++ b/proc/Envia2SciELONetworkPadrao.bat @@ -0,0 +1,4 @@ +export PATH=$PATH:. + + +./scielo_network/main_generate_and_transfer_new_and_updated.bat ../bases ../bases/pdf From f0489a1ca540069940ad48bb94061d1844c278d5 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 18:57:02 -0300 Subject: [PATCH 09/13] Chama `./scielo_network/main_generate_and_transfer_new_and_updated.bat` em `GeraScielo.bat` --- proc/GeraPadrao.bat | 2 +- proc/GeraScielo.bat | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/proc/GeraPadrao.bat b/proc/GeraPadrao.bat index 09254871b..524aee7ba 100644 --- a/proc/GeraPadrao.bat +++ b/proc/GeraPadrao.bat @@ -11,4 +11,4 @@ echo GeraScielo.bat .. /scielo/web log/GeraPadrao.log adiciona echo echo Tecle CONTROL-C para sair ou ENTER para continuar... -GeraScielo.bat .. .. log/GeraPadrao.log adiciona +GeraScielo.bat .. .. log/GeraPadrao.log adiciona \ No newline at end of file diff --git a/proc/GeraScielo.bat b/proc/GeraScielo.bat index 464249012..f1c6758b9 100644 --- a/proc/GeraScielo.bat +++ b/proc/GeraScielo.bat @@ -79,3 +79,6 @@ call batch/CopiaWork2Teste.bat ../bases-work ../bases call batch/ManutencaoOff.bat ../bases call batch/InformaLog.bat $0 dh ===Fim=== LOG gravado em: $INFORMALOG + + +./scielo_network/main_generate_and_transfer_new_and_updated.bat $2/bases $2/bases/pdf \ No newline at end of file From 18d6dc372d4aa3695887ebce30eb47ce1c7128b7 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 19:00:47 -0300 Subject: [PATCH 10/13] =?UTF-8?q?Aplica=20permiss=C3=A3o=20de=20execu?= =?UTF-8?q?=C3=A7=C3=A3o?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/scielo_network/generate_id_from_article.bat | 0 .../scielo_network/main_generate_and_transfer_new_and_updated.bat | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 proc/scielo_network/generate_id_from_article.bat mode change 100644 => 100755 proc/scielo_network/main_generate_and_transfer_new_and_updated.bat diff --git a/proc/scielo_network/generate_id_from_article.bat b/proc/scielo_network/generate_id_from_article.bat old mode 100644 new mode 100755 diff --git a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat old mode 100644 new mode 100755 From f720a365d2b23d19b6085b73dde4a877fb1f442c Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 30 Sep 2021 19:10:36 -0300 Subject: [PATCH 11/13] =?UTF-8?q?Cria=20o=20diret=C3=B3rio=20temporario=20?= =?UTF-8?q?antes=20de=20tudo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main_generate_and_transfer_new_and_updated.bat | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat index 5a4e8b472..3c8da6f44 100755 --- a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat +++ b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat @@ -23,6 +23,7 @@ TITLE_MST=${BASES_PATH}/title/title TITLE_ID=scielo_network_title.id PDF_LIST=scielo_network_pdfs_list.txt +call batch/CriaDiretorio.bat ${TMP_PATH} echo > ${TIME_LOG} call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" @@ -31,8 +32,6 @@ call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 rem Verifica parametros call batch/VerifPresencaParametro.bat $0 @${BASES_PATH} "bases path" -call scielo_network/InformaLog.bat ${LOGFILE} $0 "Create ${TMP_PATH}" -call batch/CriaDiretorio.bat ${TMP_PATH} rem Obtém scielo_network_in.txt call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} scielo_network_in.txt bin get From cf02951dd580c6cfe04f7fde8c60ad0403fc1b21 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Fri, 1 Oct 2021 08:56:14 -0300 Subject: [PATCH 12/13] =?UTF-8?q?Cria=20script=20para=20gerar=20um=20arqui?= =?UTF-8?q?vo=20que=20cont=C3=A9m=20data=20de=20atualiza=C3=A7=C3=A3o=20do?= =?UTF-8?q?=20registro=20e=20PID=20=20e=20o=20transfere?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/scielo_network/README.md | 7 +++- proc/scielo_network/generate_status.bat | 40 +++++++++++++++++++ ..._generate_and_transfer_new_and_updated.bat | 15 ++++++- 3 files changed, 58 insertions(+), 4 deletions(-) create mode 100755 proc/scielo_network/generate_status.bat diff --git a/proc/scielo_network/README.md b/proc/scielo_network/README.md index b79dcb113..8f44ed997 100644 --- a/proc/scielo_network/README.md +++ b/proc/scielo_network/README.md @@ -33,9 +33,11 @@ A partir de `bases/pdf/*` e do comando `find`, é obtida uma lista de pdfs no pa Esta lista é criada em um arquivo no diretório `temp/scielo_network`. E na sequência, é feita a transferência. -A partir de `bases/title/title`, gera um arquivo do tipo `*.id` com o utilitário `cisis/i2id` no diretório temporário `temp/scielo_network` e, na sequência, é feita a transferência. +A partir de `bases/title/title`, gera um arquivo do tipo `*.id` com o utilitário `cisis/i2id` no diretório temporário `temp/scielo_network` e, na sequência, é feita sua transferência. -A partir de `scielo_network_in.txt`, obtido do ftp cadastrado e que contém a lista de PID + data de atualização do registro da base ISIS, é gerada uma lista com os documentos novos e/ou atualizados consultando `bases-work/artigo/artigo`. A partir da lista que contém os itens novos ou atualizados, um arquivo `artigo_*.id` é gerado no diretório temporário `temp/scielo_network` para cada documento usando o utilitário `cisis/i2id` e sua transferência é feita. +A partir de `scielo_network_in.txt`, obtido do ftp cadastrado e que contém a lista de PID + data de atualização do registro da base ISIS, é gerada uma lista com os documentos novos e/ou atualizados consultando `bases-work/artigo/artigo`. A partir da lista que contém os itens novos ou atualizados, um arquivo `artigo_*.id` é gerado no diretório temporário `temp/scielo_network` para cada documento usando o utilitário `cisis/i2id` é feita sua transferência. + +A partir de `bases-work/artigo/artigo`, gera o arquivo `scielo_network_status.txt` contendo a lista de data de atualização do registro e PID de todos os documentos é feita sua transferência. As credenciais das transferências podem ser obtidas de um dos arquivos: @@ -52,6 +54,7 @@ São executadas em concorrência as operações: - geração de `scielo_network_pdfs_list.txt` - geração de `scielo_network_title.id` - geração dos arquivos `scielo_network_i_*.id` +- geração dos arquivos `scielo_network_status.txt` # Script proc/Envia2SciELONetworkPadrao.bat diff --git a/proc/scielo_network/generate_status.bat b/proc/scielo_network/generate_status.bat new file mode 100755 index 000000000..756a08662 --- /dev/null +++ b/proc/scielo_network/generate_status.bat @@ -0,0 +1,40 @@ +export PATH=$PATH:. + +rem scielo_network/generate_id_from_article +rem Parametro 1: temp path +rem Parametro 2: bases path +rem Parametro 3: key +rem Parametro 4: id filename + +rem Inicializa variaveis +TMP_PATH=$1 +BASES_PATH=$2 + +OUTPUT_FILENAME=scielo_network_status.txt + +rem Inicializa variaveis +LOGFILE=${TMP_PATH}/log_generate_status_${OUTPUT_FILENAME}.log + +CISIS_DIR=cisis +ARTICLE_DB=${BASES_PATH}/artigo/artigo + +call scielo_network/InformaLog.bat ${LOGFILE} $0 "BEGIN" +call scielo_network/InformaLog.bat ${LOGFILE} $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 + +rem Verifica parametros +call batch/VerifPresencaParametro.bat $0 @${TMP_PATH} "temp path" +call batch/VerifPresencaParametro.bat $0 @${BASES_PATH} "bases path" + +call batch/VerifExisteArquivo.bat ${ARTICLE_DB}.mst + +rem Generate and transfer scilista issue +call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer ${OUTPUT_FILENAME}" +${CISIS_DIR}/mx ${ARTICLE_DB} btell=0 "bool=tp=h" "pft=ref(mfn-1,v91),',',v880/" now | sort > ${TMP_PATH}/${OUTPUT_FILENAME} + + +rem Transfer output file +call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${OUTPUT_FILENAME} bin + +call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} +call scielo_network/InformaLog.bat ${LOGFILE} $0 "FINISHED" +call scielo_network/InformaLog.bat ${LOGFILE} $0 "LOGFILE: ${LOGFILE}" diff --git a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat index 3c8da6f44..462974d7a 100755 --- a/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat +++ b/proc/scielo_network/main_generate_and_transfer_new_and_updated.bat @@ -47,11 +47,12 @@ then rem Generate and transfer scielo_network_artigo_* rem ########################################### rem Gera uma base isis SELECTED_DOCS e a indexa + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer scielo_network_artigo_*" cisis/mx seq=${TMP_PATH}/scielo_network_in.txt create=${TMP_PATH}/SELECTED_DOCS now -all cisis/mx ${TMP_PATH}/SELECTED_DOCS "fst=1 0 v1/" fullinv=${TMP_PATH}/SELECTED_DOCS now -all rem Usa a base SELECTED_DOCS indexada para identificar os documentos que são novos ou desatualizados - cisis/mx ${ARTIGO} btell=0 tp=h lw=9999 "pft=if l(['${TMP_PATH}/SELECTED_DOCS']v880,'_',ref(mfn-1,v91)) = 0 then v880/ fi" now > ${TMP_PATH}/NOT_scielo_network_in.txt + cisis/mx ${ARTIGO} btell=0 tp=h lw=9999 "pft=if l(['${TMP_PATH}/SELECTED_DOCS']s(ref(mfn-1,v91),',',v880)) = 0 then v880/ fi" now > ${TMP_PATH}/NOT_scielo_network_in.txt rem Gera as bases e arquivos id dos documentos que são novos ou desatualizados cisis/mx seq=${TMP_PATH}/NOT_scielo_network_in.txt lw=9999 "pft=if p(v1) then './scielo_network/generate_id_from_article.bat ${TMP_PATH} ${BASES_PATH} IV=',v1,'$ scielo_network_artigo_',v1/ fi" now > ${TMP_PATH}/NOT_IN_SCIELO_NETWORK.bat @@ -71,7 +72,7 @@ then rem ########################################### rem Generate and transfer scielo_network_title.* rem ########################################### - call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer ${TITLE_ID}" + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer scielo_network_title.*" nohup scielo_network/id_generate.bat ${TMP_PATH} ${TITLE_MST} ${TITLE_ID} > ${TMP_PATH}/nohup.scielo_network_title.out& call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_title.out" @@ -80,12 +81,22 @@ then rem Generate and transfer scielo_network_i_* rem ########################################### rem Gera as bases e arquivos id dos issues dos documentos que são novos ou desatualizados + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer scielo_network_i_*" cisis/mx seq=${TMP_PATH}/NOT_scielo_network_in.txt lw=9999 "pft=v1*1.17/" now | sort -u > ${TMP_PATH}/ISSUE_NOT_scielo_network_in.txt cisis/mx seq=${TMP_PATH}/ISSUE_NOT_scielo_network_in.txt lw=9999 "pft=if p(v1) then './scielo_network/generate_id_from_article.bat ${TMP_PATH} ${BASES_PATH} Y',v1,'$ scielo_network_i_',v1/ fi" now > ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat chmod +x ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat nohup ${TMP_PATH}/ISSUE_NOT_IN_SCIELO_NETWORK.bat > ${TMP_PATH}/nohup.scielo_network_i.out& call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_i.out" + + rem ########################################### + rem Generate and transfer scielo_network_status.txt + rem ########################################### + rem Gera arquivo texto que contém data de atualização do registro e PID de cada documento + call scielo_network/InformaLog.bat ${LOGFILE} $0 "In background generate and transfer scielo_network_status.txt" + nohup ./scielo_network/generate_status.bat ${TMP_PATH} ${BASES_PATH} > ${TMP_PATH}/nohup.scielo_network_status.out& + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Check ${TMP_PATH}/nohup.scielo_network_status.out" + fi From 05675a9c45db51ea3b3be46d2d923b05084be969 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Fri, 1 Oct 2021 09:45:06 -0300 Subject: [PATCH 13/13] =?UTF-8?q?Na=20transfer=C3=AAncia,=20cria=20o=20tgz?= =?UTF-8?q?=20e=20e=20o=20envia.=20Tamb=C3=A9m=20apaga=20os=20arquivos=20t?= =?UTF-8?q?ransferidos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proc/scielo_network/id_generate.bat | 15 --------------- proc/scielo_network/pdfs_list.bat | 4 ---- proc/scielo_network/transf.bat | 29 ++++++++++++++++++++++++++--- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/proc/scielo_network/id_generate.bat b/proc/scielo_network/id_generate.bat index 2b7e41b8e..a8d745881 100755 --- a/proc/scielo_network/id_generate.bat +++ b/proc/scielo_network/id_generate.bat @@ -34,21 +34,6 @@ rem Transfer id file call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${ID_FILE} bin -rem Generate tgz file -call scielo_network/InformaLog.bat ${LOGFILE} $0 "Generate tgz file ${ID_FILE}" -back=`pwd` -cd ${TMP_PATH} -tar cvfzp ${ID_FILE}.tgz ${ID_FILE} -cd $back -rem Transfer tgz file -call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${ID_FILE}.tgz bin - - -rem Delete id file and id file tgz -call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILE} -call batch/DeletaArquivo.bat ${TMP_PATH}/${ID_FILE}.tgz - - rem Register errors call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} diff --git a/proc/scielo_network/pdfs_list.bat b/proc/scielo_network/pdfs_list.bat index c3b766f36..847f30254 100755 --- a/proc/scielo_network/pdfs_list.bat +++ b/proc/scielo_network/pdfs_list.bat @@ -30,10 +30,6 @@ find ${BASES_PDF_PATH} -name "*.pdf" > ${TMP_PATH}/${PDF_LIST_NAME} rem Transfer call scielo_network/transf.bat ${LOGFILE} ${TMP_PATH} ${PDF_LIST_NAME} bin -rem Delete pids_list.* -call batch/DeletaArquivo.bat ${TMP_PATH}/${PDF_LIST_NAME} -call batch/DeletaArquivo.bat ${TMP_PATH}/${PDF_LIST_NAME}.tgz - rem Register errors call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE} diff --git a/proc/scielo_network/transf.bat b/proc/scielo_network/transf.bat index 73f5a1013..7efb0f5c3 100755 --- a/proc/scielo_network/transf.bat +++ b/proc/scielo_network/transf.bat @@ -30,6 +30,18 @@ call batch/VerifPresencaParametro.bat $0 @${FTP_BIN_OR_ASC} "bin or asc" call batch/VerifExisteArquivo.bat ${TMP_PATH}/${FILE_TO_TRANSFER} + +if [ "put" == "${GET_OR_PUT}" ] +then + rem Generate tgz file + call scielo_network/InformaLog.bat ${LOGFILE} $0 "Generate ${FILE_TO_TRANSFER}.tgz" + back=`pwd` + cd ${TMP_PATH} + tar cvfzp ${FILE_TO_TRANSFER}.tgz ${FILE_TO_TRANSFER} + cd $back +fi + + rem Create ftp instructions file FTP_INSTR=${TMP_PATH}/ftp_instructions_${FILE_TO_TRANSFER}.txt if [ ! -f ${FTP_INSTR} ] @@ -62,10 +74,15 @@ then cat ${FTP_HEAD_FILE_PATH} > ${FTP_INSTR} echo "${FTP_BIN_OR_ASC}" >> ${FTP_INSTR} echo "lcd ${TMP_PATH}" >> ${FTP_INSTR} - echo "${GET_OR_PUT} ${FILE_TO_TRANSFER}" >> ${FTP_INSTR} - if [ "put" == "${GET_OR_PUT}" -a "@"!="@${TIME_LOG}" -a -f ${TIME_LOG} ] + if [ "put" == "${GET_OR_PUT}" ] then - echo "put `basename ${TIME_LOG}`" >> ${FTP_INSTR} + echo "${GET_OR_PUT} ${FILE_TO_TRANSFER}.tgz" >> ${FTP_INSTR} + if [ "@"!="@${TIME_LOG}" -a -f ${TIME_LOG} ] + then + echo "put `basename ${TIME_LOG}`" >> ${FTP_INSTR} + fi + else + echo "${GET_OR_PUT} ${FILE_TO_TRANSFER}" >> ${FTP_INSTR} fi echo "close" >> ${FTP_INSTR} echo "bye" >> ${FTP_INSTR} @@ -77,6 +94,12 @@ ftp -n < ${FTP_INSTR} >> ${LOGFILE} rem Delete ftp instruction file call batch/DeletaArquivo.bat ${FTP_INSTR} +if [ "put" == "${GET_OR_PUT}" ] +then + call batch/DeletaArquivo.bat ${TMP_PATH}/${FILE_TO_TRANSFER} + call batch/DeletaArquivo.bat ${TMP_PATH}/${FILE_TO_TRANSFER}.tgz +fi + rem Register errors call batch/ifErrorLevel.bat $? batch/AchouErro.bat $0 ftp: ${LOGFILE}