Getting started¶
How to use koala from a client perspective.
Regular SIP workflow¶
Steps:
- Setup metadata extraction tool.
- Extract metadata from file.
- Pack metadata file (mets.xml) and content into SIP archive file.
- Upload SIP via SFTP to koala.
- Check ingest process via koala ticket system.
Setup metadata extraction toolkit: FITS¶
In order to ensure long term archivability metadata must be extracted beforehand.
root@host:~/getting-started# wget https://github.com/harvard-lts/fits/releases/download/1.5.0/fits-1.5.0.zip
root@host:~/getting-started# unzip fits-1.5.0.zip
Sample script to extract metadata from PDF and create SIP file¶
root@host:~/getting-started# cat mets-template.sh
#!/bin/bash
FITS_TOOL_PATH=/root/getting-started/fits.sh
__FILENAME__="littledorrit0000dick_o8l6.pdf" # the file I want to archive
__CREATE_DATE__="$(date --iso-8601=seconds)"
__AGENT_NAME__="GWDG"
__AGENT_NOTE__="Automatisch generierte Metadaten"
__PID__="urn:nbn:de:gwdg:test" # must be generated by PID service
__CREATE_DATE__="$(date --iso-8601=seconds)"
__MASTER_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_RECORD_CREATOR__="GWDG"
__NR_OF_FILES__=1
__FORMAT__="urn:diasid:fty:kopal:0200507050000000000064"
__FILE_CREATED__="$(date --iso-8601=seconds)"
__SIZE__="$(stat --printf="%s" ${__FILENAME__})"
__CHECKSUM__="$(sha1sum ${__FILENAME__} | awk '{ print $1 }')"
__XOR_OF_FILE_CHECKS=${__CHECKSUM__}
__CHECKSUMTYPE__="SHA-1"
__MIMETYPE__="application/pdf"
UUID=$(uuidgen)
mkdir -p ${UUID}/content
cp -v ${__FILENAME__} ${UUID}/content
cd ${UUID}
cat << EOF > mets.xml
<?xml version="1.0" encoding="UTF-8"?>
<mets OBJID="" TYPE="kopal Submission Information Package" PROFILE="DNB" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.ddb.de/LMERfile http://files.dnb.de/standards/lmer/lmer-file.xsd http://www.ddb.de/LMERobject http://files.dnb.de/standards/lmer/lmer-object.xsd http://www.ddb.de/LMERprocess http://files.dnb.de/standards/lmer/lmer-process.xsd http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd" xmlns="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:lmerFile="http://www.ddb.de/LMERfile" xmlns:lmerObject="http://www.ddb.de/LMERobject" xmlns:lmerProcess="http://www.ddb.de/LMERprocess" xmlns:dc="http://purl.org/dc/elements/1.1/">
<metsHdr CREATEDATE="${__CREATE_DATE__}" RECORDSTATUS="TEST">
<agent ROLE="ARCHIVIST" TYPE="ORGANIZATION">
<name>${__AGENT_NAME__}</name>
<note>${__AGENT_NOTE__}</note>
</agent>
</metsHdr>
<amdSec ID="AmdSec-0001">
<techMD ID="TechMD-LMER-Object">
<mdWrap ID="TechMD-LMER-Object-MdWrap" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerObject" LABEL="LMERobject">
<xmlData>
<lmerObject:persistentIdentifier>${__PID__}</lmerObject:persistentIdentifier>
<lmerObject:transferChecksum CHECKSUMTYPE="xor of sha1 file checksums">${__XOR_OF_FILE_CHECKS__}</lmerObject:transferChecksum>
<lmerObject:masterCreationDate>${__MASTER_CREATION_DATE__}</lmerObject:masterCreationDate>
<lmerObject:metadataCreationDate>${__METADATA_CREATION_DATE__}</lmerObject:metadataCreationDate>
<lmerObject:metadataRecordCreator>${__METADATA_RECORD_CREATOR__}</lmerObject:metadataRecordCreator>
<lmerObject:numberOfFiles>${__NR_OF_FILES__}</lmerObject:numberOfFiles>
</xmlData>
</mdWrap>
</techMD>
<techMD ID="TechMD-File--0">
<mdWrap ID="TechMD-File--0-MDWRAP" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerFile">
<xmlData>
<lmerFile:format REGISTRYNAME="DIAS">${__FORMAT__}</lmerFile:format>
<lmerFile:xmlData MDTYPE="FITS">
$(./fits.sh -i ${__FILENAME__})
</lmerFile:xmlData>
</xmlData>
</mdWrap>
</techMD>
</amdSec>
<fileSec>
<fileGrp ID="ASSET" ADMID="TechMD-LMER-Object">
<file ID="FILE-0" ADMID="TechMD-File--0" CREATED="${__FILE_CREATED__}" SIZE="${__SIZE__}" CHECKSUM="${__CHECKSUM__}" CHECKSUMTYPE="${__CHECKSUMTYPE__}" MIMETYPE="${__MIMETYPE__}">
<FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file://./content/${__FILENAME__}"/>
</file>
</fileGrp>
</fileSec>
<structMap TYPE="ASSET">
<div ORDER="1" LABEL="File list" TYPE="ASSET">
<fptr FILEID="FILE-0"/>
</div>
</structMap>
</mets>
EOF
zip -r sip-${UUID}.zip *
echo done
Run script¶
root@host:~/getting-started# ./mets-template.sh
'littledorrit0000dick_o8l6.pdf' -> '7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf'
adding: content/ (stored 0%)
adding: content/littledorrit0000dick_o8l6.pdf (deflated 4%)
adding: mets.xml (deflated 62%)
done
root@host:~/getting-started# find 7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/mets.xml
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf
7f55da15-6551-4e6a-ac3a-13f6df1328ff/sip-7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
Upload¶
Request transport parameters from: API
root@host:~/getting-started# export SSHPASS=your-password-here
sshpass -e sftp -oBatchMode=no -b - ftpuser@koala.gwdg.de << !
cd /data/prddias/preload/SIP/DDBDIASMETS10
put 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp
mv 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
bye
!
Check ingest progress via ticket system¶
State: done
indicates a successfully ingested SIP.
root@host:~/getting-started# curl https://koala.gwdg.de/IngestStatus/Ingest?TicketId=7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
<?xml version="1.0" encoding="UTF-8"?>
<MetadataBlock type="busy" version="1.0">
<Description>Ingest Status</Description>
<Metadata>
<StartTimestamp>2020-06-20 08:51:09</StartTimestamp>
<SourceID>loader@8a9b9b1cb712</SourceID>
<State>done</State>
<Substate>done</Substate>
<SupplementaryInformation>
</SupplementaryInformation>
<Timestamp>2020-06-20 06:51:11</Timestamp>
</Metadata>
</MetadataBlock>