Getting started¶
How to use koala from a client perspective.
Regular SIP workflow¶
Steps:
- Setup metadata extraction tool.
- Extract metadata from file.
- Pack metadata file (mets.xml) and content into SIP archive file.
- Upload SIP via SFTP to koala.
- Check ingest process via koala ticket system.
Setup metadata extraction toolkit: FITS¶
In order to ensure long term archivability metadata must be extracted beforehand.
root@host:~/getting-started# wget https://github.com/harvard-lts/fits/releases/download/1.5.0/fits-1.5.0.zip
root@host:~/getting-started# unzip fits-1.5.0.zip
Sample script to extract metadata from PDF and create SIP file¶
root@host:~/getting-started# cat mets-template.sh
#!/bin/bash
FITS_TOOL_PATH=/root/getting-started/fits.sh
__FILENAME__="littledorrit0000dick_o8l6.pdf" # the file I want to archive
__CREATE_DATE__="$(date --iso-8601=seconds)"
__AGENT_NAME__="GWDG"
__AGENT_NOTE__="Automatisch generierte Metadaten"
__PID__="urn:nbn:de:gwdg:test" # must be generated by PID service
__CREATE_DATE__="$(date --iso-8601=seconds)"
__MASTER_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_RECORD_CREATOR__="GWDG"
__NR_OF_FILES__=1
__FORMAT__="urn:diasid:fty:kopal:0200507050000000000064"
__FILE_CREATED__="$(date --iso-8601=seconds)"
__SIZE__="$(stat --printf="%s" ${__FILENAME__})"
__CHECKSUM__="$(sha1sum ${__FILENAME__} | awk '{ print $1 }')"
__XOR_OF_FILE_CHECKS=${__CHECKSUM__}
__CHECKSUMTYPE__="SHA-1"
__MIMETYPE__="application/pdf"
UUID=$(uuidgen)
mkdir -p ${UUID}/content
cp -v ${__FILENAME__} ${UUID}/content
cd ${UUID}
cat << EOF > mets.xml
<?xml version="1.0" encoding="UTF-8"?>
<mets OBJID="" TYPE="kopal Submission Information Package" PROFILE="DNB" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd    http://www.ddb.de/LMERfile http://files.dnb.de/standards/lmer/lmer-file.xsd    http://www.ddb.de/LMERobject http://files.dnb.de/standards/lmer/lmer-object.xsd    http://www.ddb.de/LMERprocess http://files.dnb.de/standards/lmer/lmer-process.xsd    http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd" xmlns="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:lmerFile="http://www.ddb.de/LMERfile" xmlns:lmerObject="http://www.ddb.de/LMERobject" xmlns:lmerProcess="http://www.ddb.de/LMERprocess" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <metsHdr CREATEDATE="${__CREATE_DATE__}" RECORDSTATUS="TEST">
    <agent ROLE="ARCHIVIST" TYPE="ORGANIZATION">
      <name>${__AGENT_NAME__}</name>
      <note>${__AGENT_NOTE__}</note>
    </agent>
  </metsHdr>
  <amdSec ID="AmdSec-0001">
    <techMD ID="TechMD-LMER-Object">
      <mdWrap ID="TechMD-LMER-Object-MdWrap" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerObject" LABEL="LMERobject">
        <xmlData>
          <lmerObject:persistentIdentifier>${__PID__}</lmerObject:persistentIdentifier>
          <lmerObject:transferChecksum CHECKSUMTYPE="xor of sha1 file checksums">${__XOR_OF_FILE_CHECKS__}</lmerObject:transferChecksum>
          <lmerObject:masterCreationDate>${__MASTER_CREATION_DATE__}</lmerObject:masterCreationDate>
          <lmerObject:metadataCreationDate>${__METADATA_CREATION_DATE__}</lmerObject:metadataCreationDate>
          <lmerObject:metadataRecordCreator>${__METADATA_RECORD_CREATOR__}</lmerObject:metadataRecordCreator>
          <lmerObject:numberOfFiles>${__NR_OF_FILES__}</lmerObject:numberOfFiles>
        </xmlData>
      </mdWrap>
    </techMD>
    <techMD ID="TechMD-File--0">
      <mdWrap ID="TechMD-File--0-MDWRAP" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerFile">
        <xmlData>
          <lmerFile:format REGISTRYNAME="DIAS">${__FORMAT__}</lmerFile:format>
          <lmerFile:xmlData MDTYPE="FITS">
            $(./fits.sh -i ${__FILENAME__})
          </lmerFile:xmlData>
        </xmlData>
      </mdWrap>
    </techMD>
  </amdSec>
  <fileSec>
    <fileGrp ID="ASSET" ADMID="TechMD-LMER-Object">
      <file ID="FILE-0" ADMID="TechMD-File--0" CREATED="${__FILE_CREATED__}" SIZE="${__SIZE__}" CHECKSUM="${__CHECKSUM__}" CHECKSUMTYPE="${__CHECKSUMTYPE__}" MIMETYPE="${__MIMETYPE__}">
        <FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file://./content/${__FILENAME__}"/>
      </file>
    </fileGrp>
  </fileSec>
  <structMap TYPE="ASSET">
    <div ORDER="1" LABEL="File list" TYPE="ASSET">
      <fptr FILEID="FILE-0"/>
    </div>
  </structMap>
</mets>
EOF
zip -r sip-${UUID}.zip *
echo done
Run script¶
root@host:~/getting-started# ./mets-template.sh
'littledorrit0000dick_o8l6.pdf' -> '7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf'
  adding: content/ (stored 0%)
  adding: content/littledorrit0000dick_o8l6.pdf (deflated 4%)
  adding: mets.xml (deflated 62%)
done
root@host:~/getting-started# find 7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/mets.xml
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf
7f55da15-6551-4e6a-ac3a-13f6df1328ff/sip-7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
Upload¶
Request transport parameters from: API
root@host:~/getting-started# export SSHPASS=your-password-here
  sshpass -e sftp -oBatchMode=no -b - ftpuser@koala.gwdg.de << !
   cd /data/prddias/preload/SIP/DDBDIASMETS10
   put 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp
   mv 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
   bye
!
Check ingest progress via ticket system¶
State: done indicates a successfully ingested SIP.
root@host:~/getting-started# curl https://koala.gwdg.de/IngestStatus/Ingest?TicketId=7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
<?xml version="1.0" encoding="UTF-8"?>
<MetadataBlock type="busy" version="1.0">
    <Description>Ingest Status</Description>
    <Metadata>
        <StartTimestamp>2020-06-20 08:51:09</StartTimestamp>
        <SourceID>loader@8a9b9b1cb712</SourceID>
        <State>done</State>
        <Substate>done</Substate>
        <SupplementaryInformation>
        </SupplementaryInformation>
        <Timestamp>2020-06-20 06:51:11</Timestamp>
    </Metadata>
</MetadataBlock>