Skip to content

Getting started

How to use koala from a client perspective.

Regular SIP workflow

Steps:

  1. Setup metadata extraction tool.
  2. Extract metadata from file.
  3. Pack metadata file (mets.xml) and content into SIP archive file.
  4. Upload SIP via SFTP to koala.
  5. Check ingest process via koala ticket system.

Setup metadata extraction toolkit: FITS

In order to ensure long term archivability metadata must be extracted beforehand.

root@host:~/getting-started# wget https://github.com/harvard-lts/fits/releases/download/1.5.0/fits-1.5.0.zip
root@host:~/getting-started# unzip fits-1.5.0.zip

Sample script to extract metadata from PDF and create SIP file

root@host:~/getting-started# cat mets-template.sh

#!/bin/bash

FITS_TOOL_PATH=/root/getting-started/fits.sh

__FILENAME__="littledorrit0000dick_o8l6.pdf" # the file I want to archive

__CREATE_DATE__="$(date --iso-8601=seconds)"
__AGENT_NAME__="GWDG"
__AGENT_NOTE__="Automatisch generierte Metadaten"
__PID__="urn:nbn:de:gwdg:test" # must be generated by PID service
__CREATE_DATE__="$(date --iso-8601=seconds)"
__MASTER_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_CREATION_DATE__="$(date --iso-8601=seconds)"
__METADATA_RECORD_CREATOR__="GWDG"
__NR_OF_FILES__=1
__FORMAT__="urn:diasid:fty:kopal:0200507050000000000064"
__FILE_CREATED__="$(date --iso-8601=seconds)"
__SIZE__="$(stat --printf="%s" ${__FILENAME__})"
__CHECKSUM__="$(sha1sum ${__FILENAME__} | awk '{ print $1 }')"
__XOR_OF_FILE_CHECKS=${__CHECKSUM__}
__CHECKSUMTYPE__="SHA-1"
__MIMETYPE__="application/pdf"


UUID=$(uuidgen)

mkdir -p ${UUID}/content
cp -v ${__FILENAME__} ${UUID}/content
cd ${UUID}


cat << EOF > mets.xml

<?xml version="1.0" encoding="UTF-8"?>
<mets OBJID="" TYPE="kopal Submission Information Package" PROFILE="DNB" xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd    http://www.ddb.de/LMERfile http://files.dnb.de/standards/lmer/lmer-file.xsd    http://www.ddb.de/LMERobject http://files.dnb.de/standards/lmer/lmer-object.xsd    http://www.ddb.de/LMERprocess http://files.dnb.de/standards/lmer/lmer-process.xsd    http://purl.org/dc/elements/1.1/ http://dublincore.org/schemas/xmls/qdc/2003/04/02/dc.xsd" xmlns="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:lmerFile="http://www.ddb.de/LMERfile" xmlns:lmerObject="http://www.ddb.de/LMERobject" xmlns:lmerProcess="http://www.ddb.de/LMERprocess" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <metsHdr CREATEDATE="${__CREATE_DATE__}" RECORDSTATUS="TEST">
    <agent ROLE="ARCHIVIST" TYPE="ORGANIZATION">
      <name>${__AGENT_NAME__}</name>
      <note>${__AGENT_NOTE__}</note>
    </agent>
  </metsHdr>
  <amdSec ID="AmdSec-0001">
    <techMD ID="TechMD-LMER-Object">
      <mdWrap ID="TechMD-LMER-Object-MdWrap" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerObject" LABEL="LMERobject">
        <xmlData>
          <lmerObject:persistentIdentifier>${__PID__}</lmerObject:persistentIdentifier>
          <lmerObject:transferChecksum CHECKSUMTYPE="xor of sha1 file checksums">${__XOR_OF_FILE_CHECKS__}</lmerObject:transferChecksum>
          <lmerObject:masterCreationDate>${__MASTER_CREATION_DATE__}</lmerObject:masterCreationDate>
          <lmerObject:metadataCreationDate>${__METADATA_CREATION_DATE__}</lmerObject:metadataCreationDate>
          <lmerObject:metadataRecordCreator>${__METADATA_RECORD_CREATOR__}</lmerObject:metadataRecordCreator>
          <lmerObject:numberOfFiles>${__NR_OF_FILES__}</lmerObject:numberOfFiles>
        </xmlData>
      </mdWrap>
    </techMD>
    <techMD ID="TechMD-File--0">
      <mdWrap ID="TechMD-File--0-MDWRAP" MIMETYPE="text/xml" MDTYPE="OTHER" OTHERMDTYPE="lmerFile">
        <xmlData>
          <lmerFile:format REGISTRYNAME="DIAS">${__FORMAT__}</lmerFile:format>
          <lmerFile:xmlData MDTYPE="FITS">

            $(./fits.sh -i ${__FILENAME__})

          </lmerFile:xmlData>
        </xmlData>
      </mdWrap>
    </techMD>
  </amdSec>
  <fileSec>
    <fileGrp ID="ASSET" ADMID="TechMD-LMER-Object">
      <file ID="FILE-0" ADMID="TechMD-File--0" CREATED="${__FILE_CREATED__}" SIZE="${__SIZE__}" CHECKSUM="${__CHECKSUM__}" CHECKSUMTYPE="${__CHECKSUMTYPE__}" MIMETYPE="${__MIMETYPE__}">
        <FLocat LOCTYPE="URL" xlink:type="simple" xlink:href="file://./content/${__FILENAME__}"/>
      </file>
    </fileGrp>
  </fileSec>
  <structMap TYPE="ASSET">
    <div ORDER="1" LABEL="File list" TYPE="ASSET">
      <fptr FILEID="FILE-0"/>
    </div>
  </structMap>
</mets>

EOF

zip -r sip-${UUID}.zip *
echo done

Run script

root@host:~/getting-started# ./mets-template.sh
'littledorrit0000dick_o8l6.pdf' -> '7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf'
  adding: content/ (stored 0%)
  adding: content/littledorrit0000dick_o8l6.pdf (deflated 4%)
  adding: mets.xml (deflated 62%)
done

root@host:~/getting-started# find 7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/
7f55da15-6551-4e6a-ac3a-13f6df1328ff/mets.xml
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content
7f55da15-6551-4e6a-ac3a-13f6df1328ff/content/littledorrit0000dick_o8l6.pdf
7f55da15-6551-4e6a-ac3a-13f6df1328ff/sip-7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip

Upload

Request transport parameters from: API

root@host:~/getting-started# export SSHPASS=your-password-here
  sshpass -e sftp -oBatchMode=no -b - ftpuser@koala.gwdg.de << !
   cd /data/prddias/preload/SIP/DDBDIASMETS10
   put 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp
   mv 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip.tmp 7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
   bye
!

Check ingest progress via ticket system

State: done indicates a successfully ingested SIP.

root@host:~/getting-started# curl https://koala.gwdg.de/IngestStatus/Ingest?TicketId=7f55da15-6551-4e6a-ac3a-13f6df1328ff.zip
<?xml version="1.0" encoding="UTF-8"?>
<MetadataBlock type="busy" version="1.0">
    <Description>Ingest Status</Description>
    <Metadata>
        <StartTimestamp>2020-06-20 08:51:09</StartTimestamp>
        <SourceID>loader@8a9b9b1cb712</SourceID>
        <State>done</State>
        <Substate>done</Substate>
        <SupplementaryInformation>
        </SupplementaryInformation>
        <Timestamp>2020-06-20 06:51:11</Timestamp>
    </Metadata>
</MetadataBlock>