#!/bin/bash

# ===========================================================================
#
#                            PUBLIC DOMAIN NOTICE
#            National Center for Biotechnology Information (NCBI)
#
#  This software/database is a "United States Government Work" under the
#  terms of the United States Copyright Act.  It was written as part of
#  the author's official duties as a United States Government employee and
#  thus cannot be copyrighted.  This software/database is freely available
#  to the public for use. The National Library of Medicine and the U.S.
#  Government do not place any restriction on its use or reproduction.
#  We would, however, appreciate having the NCBI and the author cited in
#  any work or product based on this material.
#
#  Although all reasonable efforts have been taken to ensure the accuracy
#  and reliability of the software and data, the NLM and the U.S.
#  Government do not and cannot warrant the performance or results that
#  may be obtained by using this software or data. The NLM and the U.S.
#  Government disclaim all warranties, express or implied, including
#  warranties of performance, merchantability or fitness for any particular
#  purpose.
#
# ===========================================================================
#
# File Name:  efetch
#
# Author:  Jonathan Kans, Aaron Ucko
#
# Version Creation Date:   04/08/2020
#
# ==========================================================================

pth=$( dirname "$0" )

case "$pth" in
  /* )
    ;; # already absolute
  *  )
    pth=$(cd "$pth" && pwd)
    ;;
esac

case ":$PATH:" in
  *:"$pth":* )
    ;;
  * )
    PATH="$PATH:$pth"
    export PATH
    ;;
esac

# handle common flags - dot command is equivalent of "source"

if [ ! -f "$pth"/ecommon.sh ]
then
  echo "ERROR: Unable to find '$pth/ecommon.sh' file" >&2
  exit 1
fi

. "$pth"/ecommon.sh

# initialize specific flags

internal=false

isDocsum=false

format=""
mode=""
style=""

chunk=1
force=0
min=0
max=0

seq_start=0
seq_stop=0
strand=0
complexity=0
extend=-1
extrafeat=-1
showgaps=""

isTextASN=false
isBinaryASN=false

json=false

# read command-line arguments

while [ $# -gt 0 ]
do
  tag="$1"
  rem="$#"
  case "$tag" in
    -internal )
      internal=true
      shift
      ;;
    -newmode | -oldmode )
      shift
      ;;
    -db )
      shift
      if [ $# -gt 0 ]
      then
        if [ -n "$db" ]
        then
          if [ "$db" = "$1" ]
          then
            DisplayWarning "Redundant -db '$1' argument"
          else
            DisplayError "Colliding -db '$db' and '$1' arguments"
            # exit 1
          fi
        fi
        db="$1"
        shift
      else
        DisplayError "Missing -db argument"
        exit 1
      fi
      ;;
    -id )
      CheckForArgumentValue "$tag" "$rem"
      shift
      ids="$1"
      shift
      while [ $# -gt 0 ]
      do
        case "$1" in
          -* )
            break
            ;;
          * )
            # concatenate run of UIDs with commas
            ids="$ids,$1"
            shift
            ;;
        esac
      done
      ;;
    -format )
      shift
      if [ $# -gt 0 ]
      then
        if [ -n "$format" ]
        then
          if [ "$isDocsum" = true ] && [ "$1" = "docsum" ]
          then
            DisplayWarning "esummary does not need redundant -format docsum argument"
          elif [ "$format" = "$1" ]
          then
            DisplayWarning "Redundant -format '$1' argument"
          else
            DisplayError "Colliding -format '$format' and '$1' arguments"
            # exit 1
          fi
        fi
        format="$1"
        shift
      else
        DisplayError "Missing -format argument"
        exit 1
      fi
      ;;
    -docsum )
      # esummary is implemented as efetch -docsum "$@"
      if [ -n "$format" ]
      then
        if [ "$format" = "docsum" ]
        then
          DisplayWarning "Superflouous -docsum argument"
        else
          DisplayError "Colliding -docsum and -format '$format' arguments"
          # exit 1
        fi
      fi
      format="docsum"
      isDocsum=true
      shift
      ;;
    -mode )
      CheckForArgumentValue "$tag" "$rem"
      shift
      mode="$1"
      shift
      ;;
    -style )
      CheckForArgumentValue "$tag" "$rem"
      shift
      style="$1"
      shift
      ;;
    -seq_start )
      CheckForArgumentValue "$tag" "$rem"
      shift
      # 1-based
      seq_start=$(( $1 ))
      shift
      ;;
    -chr_start )
      CheckForArgumentValue "$tag" "$rem"
      shift
      # 0-based
      seq_start=$(( $1 + 1 ))
      shift
      ;;
    -seq_stop )
      CheckForArgumentValue "$tag" "$rem"
      shift
      # 1-based
      seq_stop=$(( $1 ))
      shift
      ;;
    -chr_stop )
      CheckForArgumentValue "$tag" "$rem"
      shift
      # 0-based
      seq_stop=$(( $1 + 1 ))
      shift
      ;;
    -strand )
      shift
      if [ $# -gt 0 ]
      then
        case "$1" in
          forward | plus | 1 | "\+" )
            strand=1
            ;;
          revcomp | reverse | minus | 2 | "\-" )
            strand=2
            ;;
          * )
            DisplayError "Unrecognized -strand argument '$strand'"
            exit 1
            ;;
        esac
        shift
      else
        DisplayError "Missing -strand argument"
        exit 1
      fi
      ;;
    -forward | -plus )
      strand=1
      shift
      ;;
    -revcomp | -reverse | -minus )
      strand=2
      shift
      ;;
    -h | -help | --help | help )
      newVersion=$( NewerEntrezDirectVersion )
      if [ "$isDocsum" = true ]
      then
        echo "esummary $version"
        echo ""
        if [ -n "$newVersion" ]
        then
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
          cat "$pth/help/esummary-help.txt"
          echo ""
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
        else
          cat "$pth/help/esummary-help.txt"
          echo ""
        fi
      else
        echo "efetch $version"
        echo ""
        if [ -n "$newVersion" ]
        then
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
          cat "$pth/help/efetch-help.txt"
          echo ""
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
        else
          cat "$pth/help/efetch-help.txt"
          echo ""
        fi
      fi
      exit 0
      ;;
    -chunk )
      CheckForArgumentValue "$tag" "$rem"
      shift
      # override calculated chunk value (undocumented)
      force=$(( $1 ))
      shift
      ;;
    -start )
      CheckForArgumentValue "$tag" "$rem"
      shift
      min=$(( $1 ))
      shift
      ;;
    -stop )
      CheckForArgumentValue "$tag" "$rem"
      shift
      max=$(( $1 ))
      shift
      ;;
    -complexity )
      CheckForArgumentValue "$tag" "$rem"
      shift
      complexity=$(( $1 ))
      shift
      ;;
    -extend )
      CheckForArgumentValue "$tag" "$rem"
      shift
      extend=$(( $1 ))
      shift
      ;;
    -extrafeat )
      CheckForArgumentValue "$tag" "$rem"
      shift
      extrafeat=$(( $1 ))
      shift
      ;;
    -showgaps | -show-gaps )
      showgaps="on"
      shift
      ;;
    -json )
      json=true
      shift
      ;;
    -* )
      ParseCommonArgs "$@"
      if [ "$argsConsumed" -gt 0 ]
      then
        shift "$argsConsumed"
      else
        DisplayError "Unrecognized option $1"
        exit 1
      fi
      ;;
    * )
      DisplayError "Unrecognized argument $1"
      shift
      ;;
  esac
done

FinishSetup

# check for ENTREZ_DIRECT message or piped UIDs unless database and UIDs provided in command line

if [ -z "$db" ]
then
  ParseStdin
elif [ -z "$ids" ] && [ -z "$input" ]
then
  ParseStdin
fi

if [ -z "$ids$rest$qury$input" ]
then
  needHistory=true
fi

# reality check for -db against piped dbase

if [ -n "$dbase" ] && [ -n "$db" ]
then
  if [ "$dbase" = "$db" ]
  then
    DisplayWarning "Redundant -db '$db' argument"
  else
    DisplayError "Colliding '$dbase' database and -db '$db' argument"
    # exit 1
  fi
fi

# take database from dbase value or -db argument

if [ -z "$dbase" ]
then
  dbase="$db"
fi

if [ "$dbase" = "nucleotide" ]
then
  dbase="nuccore"
fi

# check for missing required arguments

if [ -z "$dbase" ]
then
  DisplayError "Missing -db argument"
  exit 1
fi

# normalize to lower-case (e.g., SRA -> sra)

dbase=$( echo "$dbase" | tr '[:upper:]' '[:lower:]' )

# take optional days and datetype arguments from message

if [ -z "$reldate" ] && [ -n "$reldatex" ]
then
  reldate="$reldatex"
fi
if [ -z "$mindate" ] && [ -n "$mindatex" ]
then
  mindate="$mindatex"
fi
if [ -z "$maxdate" ] && [ -n "$maxdatex" ]
then
  maxdate="$maxdatex"
fi
if [ -z "$datetype" ] && [ -n "$datetypex" ]
then
  datetype="$datetypex"
fi

# normalize date arguments

FixDateConstraints

# convert spaces between UIDs to commas

ids=$( echo "$ids" | sed -e "s/ /,/g; s/,,*/,/g" )

# database and format class flags

isSequence=false
isNucleotide=false
isFasta=false

case "$dbase" in
  nucleotide | nuccore )
    isSequence=true
    isNucleotide=true
    ;;
  protein )
    isSequence=true
    ;;
esac

case "$format" in
  *fasta* )
    isFasta=true
    ;;
esac

# adjust for -db pubmed

pubmedAPA=false
pubmedASN=false

if [ "$dbase" = "pubmed" ]
then
  if [ -z "$format" ] || [ "$format" = "native" ]
  then
    format="full"
    mode="xml"
  fi
  if [ "$format" = "apa" ]
  then
    format="full"
    mode="xml"
    pubmedAPA=true
  fi
  if [ "$format" = "asn" ] || [ "$format" = "asn.1" ] || [ "$format" = "asn1" ]
  then
    format="full"
    mode="xml"
    pubmedASN=true
  fi
fi

# adjust for -db clinvar

is_variationid=false

if [ "$dbase" = "clinvar" ] && [ "$format" = "variationid" ]
then
  format="vcv"
  is_variationid=true
fi

# adjust for -db sra

if [ "$dbase" = "sra" ]
then
  if [ -z "$format" ]
  then
    format="runinfo"
  elif [ "$format" = "full" ]
  then
    mode="xml"
  elif [ "$format" = "native" ]
  then
    format="full"
    mode="xml"
  fi
fi

# adjust for sequence databases - asn.1 to asn temporarily for special case matching

if [ "$isSequence" = true ]
then
  if [ "$format" = "asn.1" ] || [ "$format" = "asn1" ]
  then
    format="asn"
  fi
  if [ "$mode" = "asn.1" ] || [ "$mode" = "asn1" ]
  then
    mode="asn"
  fi
fi

# special cases for format, mode, and style

case "$format:$mode:$isSequence" in
  xml::* )
    format="full"
    mode="xml"
    ;;
  accn:*:true )
    format="acc"
    ;;
  asn:binary:true )
    format="native"
    mode="asn.1"
    immediate=true
    isBinaryASN=true
    ;;
  asn:*:true )
    format="native"
    mode="text"
    immediate=true
    isTextASN=true
    ;;
  asn:* )
    format="asn.1"
    immediate=true
    ;;
  gbf:*:true )
    format="gb"
    ;;
  gpf:*:true )
    format="gp"
    ;;
esac

case "$style" in
  normal | none | contig )
    style=""
    ;;
  master )
    style="master"
    ;;
  conwithfeat | conwithfeats | contigwithfeat | gbconwithfeat | gbconwithfeats )
    style="conwithfeat"
    ;;
  withpart | withparts | gbwithpart | gbwithparts )
    # accept from old scripts - same result as style master
    style="withparts"
    ;;
  "" )
    ;;
  * )
    DisplayError "Unrecognized -style argument '$style'"
    exit 1
    ;;
esac

case "$format:$mode" in
  gbc: | gpc: )
    mode="xml"
    ;;
  "" )
    format="native"
    ;;
  docsum:json )
    ;;
  docsum:* )
    mode="xml"
    ;;
esac

if [ "$format" = "" ]
then
  format="native"
fi

if [ "$mode" = "" ]
then
  mode="text"
fi

if [ "$mode" = "asn" ]
then
  mode="asn.1"
fi

# do not treat TinySeq XML as FASTA

if [ "$format" = "fasta" ] && [ "$mode" = "xml" ]
then
  isFasta=false
fi

# input reality checks

if [ "$needHistory" = true ]
then
  if [ -t 0 ]
  then
    DisplayError "ENTREZ_DIRECT message not piped from stdin"
    exit 1
  fi
  if [ "$empty" = true ]
  then
    # silently exit if explicit count of "0"
    exit 0
  fi
  if [ -z "$web_env" ]
  then
    if [ -z "$rest" ]
    then
      # no results piped in, silently exit
      exit 0
    fi
    DisplayError "WebEnv value not found in efetch input"
    exit 1
  fi
  if [ -z "$qry_key" ]
  then
    DisplayError "QueryKey value not found in efetch input"
    exit 1
  fi
  if [ "$num" -lt 1 ]
  then
    # silently exit if no results to fetch
    exit 0
  fi
fi

# -id 0 looks up default record for each database

GetZero() {

  case "$dbase" in
    annotinfo       ) ids="122134" ;;
    assembly        ) ids="443538" ;;
    biocollections  ) ids="7370" ;;
    bioproject      ) ids="146229" ;;
    biosample       ) ids="3737421" ;;
    biosystems      ) ids="1223165" ;;
    blastdbinfo     ) ids="998664" ;;
    books           ) ids="1371014" ;;
    cdd             ) ids="274590" ;;
    clinvar         ) ids="10510" ;;
    clone           ) ids="18646800" ;;
    dbvar           ) ids="6173073" ;;
    gap             ) ids="872875" ;;
    gapplus         ) ids="136686" ;;
    gds             ) ids="200022309" ;;
    gencoll         ) ids="398148" ;;
    gene            ) ids="3667" ;;
    genome          ) ids="52" ;;
    geoprofiles     ) ids="16029743" ;;
    grasp           ) ids="2852486" ;;
    gtr             ) ids="559277" ;;
    homologene      ) ids="510" ;;
    ipg             ) ids="422234" ;;
    medgen          ) ids="162753" ;;
    mesh            ) ids="68007328" ;;
    ncbisearch      ) ids="3158" ;;
    nlmcatalog      ) ids="0404511" ;;
    nuccore         ) ids="1322283" ;;
    nucleotide      ) ids="1322283" ;;
    omim            ) ids="176730" ;;
    orgtrack        ) ids="319950" ;;
    pcassay         ) ids="1901" ;;
    pccompound      ) ids="16132302" ;;
    pcsubstance     ) ids="126522451" ;;
    pmc             ) ids="209839" ;;
    protein         ) ids="4557671" ;;
    proteinclusters ) ids="2945638" ;;
    pubmed          ) ids="2539356" ;;
    seqannot        ) ids="9561" ;;
    snp             ) ids="137853337" ;;
    sra             ) ids="190091" ;;
    structure       ) ids="61024" ;;
    taxonomy        ) ids="562" ;;
    unigene         ) ids="1132160" ;;
  esac
}

if [ "$ids" = "0" ]
then
  GetZero
fi

# lookup accessions in -id argument or piped from stdin

if [ "$dbase" != "clinvar" ] || [ "$format" != "vcv" ]
then
  LookupSpecialAccessions
fi

# reality checks and adjustments on sequence variables

if [ "$isSequence" = true ]
then
  if [ "$extend" -gt 0 ]
  then
    seq_start=$(( seq_start - extend ))
    seq_stop=$(( seq_stop + extend ))
  fi
else
  if [ "$seq_start" -ne 0 ]
  then
    DisplayError "Only sequence formats may use -seq_start"
    exit 1
  fi
  if [ "$seq_stop" -ne 0 ]
  then
    DisplayError "Only sequence formats may use -seq_stop"
    exit 1
  fi
  if [ "$strand" -ne 0 ]
  then
    DisplayError "Only sequence formats may use -strand"
    exit 1
  fi
  if [ "$complexity" -ne 0 ]
  then
    DisplayError "Only sequence formats may use -complexity"
    exit 1
  fi
  if [ "$extrafeat" -ne -1 ]
  then
    DisplayError "Only sequence formats may use -extrafeat"
    exit 1
  fi
  if [ -n "$showgaps" ]
  then
    DisplayError "Only sequence formats may use -showgaps"
    exit 1
  fi
fi

if [ "$isSequence" = true ]
then
  if [ "$seq_start" -gt 0 ] && [ "$seq_stop" -gt 0 ]
  then
    if [ "$seq_start" -gt "$seq_stop" ]
    then
      tmps="$seq_start"
      seq_start="$seq_stop"
      seq_stop="$tmps"
      if [ "$strand" -eq 0 ]
      then
        strand=2
      fi
    fi
  else
    seq_start=""
    seq_stop=""
  fi
  if [ "$strand" -lt 1 ]
  then
    strand=""
  fi
  if [ "$complexity" -lt 1 ]
  then
    complexity=""
  fi
  if [ "$extrafeat" -lt 1 ]
  then
    extrafeat=""
  fi
else
  # otherwise clear all sequence-related flags, will be ignored by AddIfNotEmpty
  seq_start=""
  seq_stop=""
  strand=""
  complexity=""
  extrafeat=""
  showgaps=""
fi

# determine size of individual requests

case "$format:$dbase:$mode:$isSequence" in
  uid:pubmed:*      ) chunk=5000  ;;
  uid:*             ) chunk=25000 ;;
  acc:*:true        ) chunk=10000 ;;
  url:*             ) chunk=50    ;;
  docsum:gtr:json:* ) chunk=50    ;;
  docsum:*:json:*   ) chunk=500   ;;
  docsum:*:true     ) chunk=50    ;;
  fasta:*           ) chunk=50    ;;
  bioc:*            ) chunk=100   ;;
  ipg:*             ) chunk=100   ;;
  json:snp:*        ) chunk=10    ;;
  xml:gene:*        ) chunk=200   ;;
  runinfo:sra:*     ) chunk=5     ;;
  *:sra:*           ) chunk=20    ;;
  *:*:true          ) chunk=100   ;;
  *                 ) chunk=1000  ;;
esac

if [ "$style" = "master" ] || [ "$style" = "withparts" ] || [ "$style" = "conwithfeat" ]
then
  chunk=1
fi

if [ "$force" -gt 0 ]
then
  chunk="$force"
fi

# limit uid list between min and max

LimitUidList() {

  db="$1"

  res=$( GetUIDs )
  if [ "$min" -gt 0 ] && [ "$max" -gt 0 ]
  then
    res=$( echo "$res" | sed -ne "${min},${max}p" )
  elif [ "$min" -gt 0 ]
  then
    res=$( echo "$res" | head -n "${max}" )
  elif [ "$max" -gt 0 ]
  then
    res=$( echo "$res" | tail -n +"${min}" )
  fi

  echo "$res"
}

# -format uid

if [ "$format" = "uid" ]
then
  res=$( LimitUidList "$dbase" )
  echo "$res"

  exit 0
fi

# -format url

if [ "$format" = "url" ]
then
  res=$( LimitUidList "$dbase" )
  echo "$res" |
  join-into-groups-of "$chunk" |
  while read uids
  do
    echo "https://www.ncbi.nlm.nih.gov/$dbase/$uids"
  done

  exit 0
fi

# -format urls

if [ "$format" = "urls" ]
then
  res=$( LimitUidList "$dbase" )
  echo "$res" |
  while read uid
  do
    echo "https://www.ncbi.nlm.nih.gov/$dbase/$uid"
  done

  exit 0
fi

# -format edirect

if [ "$format" = "xids" ] || [ "$format" = "xid" ] || [ "$format" = "xis" ] || [ "$format" = "edirect" ]
then
  flt=""
  # instantiate UIDs within ENTREZ_DIRECT message
  raw=$( LimitUidList "$dbase" )
  if [ -n "$raw" ]
  then
    flt=$( echo "$raw" | sed -e 's/^/  <Id>/' -e 's/$/<\/Id>/' )
    num=$( echo "$raw" | wc -l | tr -d ' ' )
  else
    # clear values if results are empty
    flt=""
    num="0"
  fi
  echo "<ENTREZ_DIRECT>"
  if [ -n "$dbase" ]
  then
    echo "  <Db>${dbase}</Db>"
  fi
  if [ -n "$num" ]
  then
    echo "  <Count>${num}</Count>"
  fi
  if [ -n "$flt" ]
  then
    echo "$flt"
  fi
  if [ -n "$err" ]
  then
    echo "  <Error>${err}</Error>"
  fi
  echo "</ENTREZ_DIRECT>"

  exit 0
fi

# -format docsum

if [ "$format" = "docsum" ]
then
  # temporary check for VDB docsum, remove once esummary.fcgi server is modified
  if [ "$isNucleotide" = true ] && [ -n "$ids" ]
  then
    is_vdb=""
    vacc=$( echo $ids | sed -e 's/\..*//' | LC_ALL=C tr a-z A-Z )
    case "$vacc" in
      [HMN][0-9][0-9][0-9][0-9][0-9] | \
      [EHMN][0-9][0-9][0-9][0-9][0-9][0-9][0-9] )
        is_vdb=yes
        ;;
      [CEFGJKOPRSTV][0-9][0-9][0-9][0-9][0-9] | \
      [ACEJKO-X][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9][0-9][0-9] | \
      [A-Z][A-Z][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]* | \
      [A-Z][A-Z][A-Z][A-Z][0-9][0-9][PS][0-9][0-9][0-9][0-9][0-9][0-9]* | \
      [A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]* | \
      [A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][0-9][0-9][PS][0-9][0-9][0-9][0-9][0-9][0-9][0-9]* )
        is_vdb=maybe
        ;;
     * )
        is_vdb=no
        ;;
    esac
    if [ "$is_vdb" = "yes" ] || [  "$is_vdb" = "maybe" ]
    then
      nquire -get https://www.ncbi.nlm.nih.gov/sviewer/flatfile/flatfile.fcgi -id "$vacc" -report 17 | grep '.'
      exit 0
    fi
  fi

  if [ "$needHistory" = false ]
  then
    if [ "$log" = true ]
    then
      printf "EFetch\n" >&2
    fi
    res=$( LimitUidList "$dbase" )
    if [ -n "$res" ]
    then
      echo "$res" |
      join-into-groups-of "$chunk" |
      while read uids
      do
        if [ "$log" = true ]
        then
          printf "." >&2
        fi
        RunWithCommonArgs nquire -url "$base" esummary.fcgi \
          -db "$dbase" -id "$uids" -version "2.0" -retmode "$mode"
      done
    fi
    if [ "$log" = true ]
    then
      printf "\n" >&2
    fi
  else
    if [ "$log" = true ]
    then
      printf "EFetch\n" >&2
    fi
    GenerateHistoryChunks "$chunk" "$min" "$max" |
    while read fr chnk
    do
      if [ "$log" = true ]
      then
        printf "." >&2
      fi
      RunWithCommonArgs nquire -url "$base" esummary.fcgi \
        -query_key "$qry_key" -WebEnv "$web_env" -retstart "$fr" -retmax "$chnk" \
        -db "$dbase" -version "2.0" -retmode "$mode"
    done
    if [ "$log" = true ]
    then
      printf "\n" >&2
    fi
  fi |
  if [ "$mode" = "json" ]
  then
    grep '.'
  elif [ "$raw" = true ]
  then
    # transmute -mixed -format indent -doctype ""
    grep '.'
  elif [ "$json" = true ]
  then
    transmute -x2j
  elif [ "$dbase" = "sra" ]
  then
    transmute -mixed -normalize "$dbase" |
    sed -e 's/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g; s/<eSummaryResult>//g; s/<\/eSummaryResult>//g' |
    transmute -mixed -compress -format indent -doctype "" -self
  else
    transmute -mixed -normalize "$dbase" |
    sed -e 's/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g; s/<eSummaryResult>//g; s/<\/eSummaryResult>//g' |
    transmute -mixed -compress -format indent -doctype ""
  fi

  exit 0
fi

# -format bioc

biocbase="https://www.ncbi.nlm.nih.gov/research/pubtator3-api/publications/"
idtype=""
prefix=""
xpoort=""

if [ "$format" = "bioc" ]
then
  if [ "$dbase" = "pubmed" ]
  then
    idtype="-pmids"
    xpoort="export"
  elif [ "$dbase" = "pmc" ]
  then
    idtype="-pmcids"
    xpoort="pmc_export"
    prefix="PMC"
  else
    DisplayError "BioC format must use -db pubmed or pmc"
    exit 1
  fi

  GetUIDs |
  while read uid
  do
    echo "$prefix${uid#PMC}"
  done |
  join-into-groups-of "$chunk" |
  while read uids
  do
    res=$( nquire -get $biocbase $xpoort biocxml $idtype $uids )
    if [ -n "$res" ]
    then
      echo "$res" |
      if [ "$raw" = true ]
      then
        # transmute -format indent -doctype ""
        grep '.'
      elif [ "$json" = true ]
      then
        transmute -x2j
      else
        transmute -normalize bioc | transmute -format indent -doctype ""
      fi
    fi
  done

  exit 0
fi

# helper function adds sequence-specific arguments (if set)

RunWithFetchArgs() {

  AddIfNotEmpty -style "$style" \
  AddIfNotEmpty -seq_start "$seq_start" \
  AddIfNotEmpty -seq_stop "$seq_stop" \
  AddIfNotEmpty -strand "$strand" \
  AddIfNotEmpty -complexity "$complexity" \
  AddIfNotEmpty -extrafeat "$extrafeat" \
  AddIfNotEmpty -show-gaps "$showgaps" \
  FlagIfNotEmpty -is_variationid "$is_variationid" \
  RunWithCommonArgs "$@"
}

# -immediate flag for full sequence records or binary ASN.1 files

if [ "$isFasta" = false ] && [ "$isSequence" = false ]
then
  immediate=false
  express=false
fi

if [ "$immediate" = true ]
then
  express=true
fi

if [ -n "$format" ] && [ "$express" = true ]
then
  chunk=5
  if [ "$immediate" = true ] || [ "$isBinaryASN" = true ] || [ "$isTextASN" = true ]
  then
    chunk=1
  fi
  GetUIDs |
  join-into-groups-of "$chunk" |
  while read uid
  do
    if [ -n "$style" ]
    then
      nquire -url "$base" efetch.fcgi \
        -db "$dbase" -id "$uid" -rettype "$format" -retmode "$mode" -style "$style"
    elif [ "$isBinaryASN" = true ]
    then
      temp=$(mktemp /tmp/FETCH_BIN_TEMP.XXXXXXXXX)
      nquire -url "$base" efetch.fcgi \
        -db "$dbase" -id "$uid" -rettype "$format" -retmode "$mode" > "$temp"

      inhex=$( tail -c1 "$temp" | od -tx1 | tr '\n' ' ' | grep -i "0a" )
      if [ "$inhex" ]
      then
        flen=$( wc -c < "$temp" | tr -d ' ' )
        count=$(( flen - 1 ))
        dd if="$temp" bs="$count" count=1 2>/dev/null
      else
        cat "$temp"
      fi

      rm "$temp"
    elif [ "$isTextASN" = true ]
    then
      nquire -url "$base" efetch.fcgi \
        -db "$dbase" -id "$uid" -rettype "$format" -retmode "$mode" | grep -v "^$"
    else
      nquire -url "$base" efetch.fcgi \
        -db "$dbase" -id "$uid" -rettype "$format" -retmode "$mode"
    fi
  done

  exit 0
fi

# other -format choices

if [ -n "$format" ]
then
  if [ "$needHistory" = false ]
  then
    res=$( LimitUidList "$dbase" )
    if [ -n "$res" ]
    then
      echo "$res" |
      join-into-groups-of "$chunk" |
      while read uids
      do
        RunWithFetchArgs nquire -url "$base" efetch.fcgi \
          -db "$dbase" -id "$uids" -rettype "$format" -retmode "$mode"
      done
    fi
  else
    GenerateHistoryChunks "$chunk" "$min" "$max" |
    while read fr chnk
    do
      RunWithFetchArgs nquire -url "$base" efetch.fcgi \
        -query_key "$qry_key" -WebEnv "$web_env" -retstart "$fr" -retmax "$chnk" \
        -db "$dbase" -rettype "$format" -retmode "$mode"
    done
  fi |
  if [ "$format" = "json" ] || [ "$mode" = "json" ] || [ "$raw" = true ] ||  [ "$isTextASN" = true ]
  then
    grep '.'
  elif [ "$json" = true ]
  then
    transmute -x2j
  elif [ "$isFasta" = true ]
  then
    grep '.'
  elif [ "$format" = "full" ] && [ "$mode" = "xml" ]
  then
    if [ "$dbase" = "pubmed" ]
    then
      if [ "$pubmedAPA" = true ]
      then
         transmute -normalize "$dbase" |
         pma2apa -ascii
      elif  [ "$pubmedASN" = true ]
      then
         transmute -normalize "$dbase" |
         pma2pme -std
      else
        # first pass to convert SOLR server's &#x...; hex encodings with html.UnescapeString
        transmute -normalize "$dbase" |
        transmute -normalize "$dbase" |
        transmute -format indent -combine -doctype ""
      fi
    elif [ "$dbase" = "sra" ]
    then
      transmute -normalize "$dbase" |
      transmute -format indent -combine -doctype "" -self
    elif [ "$dbase" = "pmc" ]
    then
      grep '.'
    elif [ "$dbase" = "pccompound" ] || [ "$dbase" = "pcsubstance" ]
    then
      transmute -mixed -normalize "$dbase"
    else
      transmute -format indent -combine -doctype ""
    fi
  elif [ "$dbase" = "sra" ] && [ "$format" = "runinfo" ]
  then
    # remove extra copies of the header between batches of results
    sed '2,${/Run,ReleaseDate,LoadDate/d;}'
  else
    grep ''
  fi

  exit 0
fi

# warn if no format recognized

DisplayError "Unrecognized format"
exit 1

