{
  "_id": "6a1f31c9b401979e73427d49",
  "Package": "rtika",
  "Type": "Package",
  "Title": "R Interface to 'Apache Tika'",
  "Version": "3.2.3",
  "Authors@R": "c(\nperson(given = \"Sasha\", family = \"Goodman\", email=\"goodmansasha@gmail.com\", role=c(\"aut\",\"cre\") ),\nperson(given = \"The Apache Software Foundation\", role=c(\"aut\",\"cph\") ),\nperson(given = \"Julia\", family = \"Silge\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\"),\nperson(given = \"David\",family = \"Gohel\", role = c(\"rev\"), comment = \"Reviewed the package for rOpenSci, see https://github.com/ropensci/software-review/issues/191/\")\n)",
  "Maintainer": "Sasha Goodman <goodmansasha@gmail.com>",
  "License": "Apache License 2.0 | file LICENSE",
  "SystemRequirements": "Java (>=11)",
  "Description": "Extract text or metadata from over a thousand file types,\nusing Apache Tika <https://tika.apache.org/>. Get either plain\ntext or structured XHTML content.",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.3",
  "URL": "https://docs.ropensci.org/rtika/,\nhttps://github.com/ropensci/rtika/",
  "BugReports": "https://github.com/ropensci/rtika/issues/",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "default-jdk libssl-dev",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2025-10-12 13:59:05 UTC",
  "RemoteUrl": "https://github.com/ropensci/rtika",
  "RemoteRef": "master",
  "RemoteSha": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-15 08:16:00 UTC",
    "User": "root"
  },
  "Author": "Sasha Goodman [aut, cre],\nThe Apache Software Foundation [aut, cph],\nJulia Silge [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/),\nDavid Gohel [rev] (Reviewed the package for rOpenSci, see\nhttps://github.com/ropensci/software-review/issues/191/)",
  "MD5sum": "78def7f8e0da85ca63c418c750c4a992",
  "_user": "ropensci",
  "_type": "src",
  "_file": "rtika_3.2.3.tar.gz",
  "_fileid": "23596a34e2e7b199426869aed559d2dd5740adb2495eb7659ebd6886e32a42b6",
  "_filesize": 838344,
  "_sha256": "23596a34e2e7b199426869aed559d2dd5740adb2495eb7659ebd6886e32a42b6",
  "_created": "2026-05-15T08:16:00.000Z",
  "_published": "2026-06-02T19:40:57.516Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79156817967,
      "time": 137,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7012926426"
    },
    {
      "job": 79156818140,
      "time": 128,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7012924593"
    },
    {
      "job": 79156818442,
      "time": 88,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7012915828"
    },
    {
      "job": 79156818228,
      "time": 77,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7012913550"
    },
    {
      "job": 79156817647,
      "time": 163,
      "config": "pkgdown",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7012896508"
    },
    {
      "job": 79156817830,
      "time": 168,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7012897801"
    },
    {
      "job": 79156817614,
      "time": 118,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7368020991"
    },
    {
      "job": 79156818163,
      "time": 80,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7012914241"
    },
    {
      "job": 79156818336,
      "time": 69,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7012912156"
    },
    {
      "job": 79156818521,
      "time": 83,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7012915028"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ropensci/rtika",
  "_commit": {
    "id": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
    "author": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "committer": "Sasha Goodman Chase <4193801+soshsquatch@users.noreply.github.com>",
    "message": "fixed tabulapdf link\n\nsmall fix, refreshed docs with pkgdown link to tabulapdf\n",
    "time": 1760277545
  },
  "_maintainer": {
    "name": "Sasha Goodman",
    "email": "goodmansasha@gmail.com",
    "login": "soshsquatch",
    "uuid": 4193801
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "curl",
      "role": "Imports"
    },
    {
      "package": "sys",
      "version": ">= 2.1",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "backports",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Suggests"
    },
    {
      "package": "xml2",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "magrittr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-41",
      "n": 5
    }
  ],
  "_tags": [],
  "_topics": [
    "extract-metadata",
    "extract-text",
    "java",
    "parse",
    "pdf-files",
    "peer-reviewed",
    "tesseract",
    "tika"
  ],
  "_stars": 55,
  "_contributors": [
    {
      "user": "soshsquatch",
      "count": 158,
      "uuid": 4193801
    },
    {
      "user": "jeroen",
      "count": 5,
      "uuid": 216319
    },
    {
      "user": "noamross",
      "count": 4,
      "uuid": 571752
    },
    {
      "user": "juliasilge",
      "count": 1,
      "uuid": 12505835
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 220,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/rtika"
  },
  "_devurl": "https://github.com/ropensci/rtika",
  "_pkgdown": "https://docs.ropensci.org/rtika/",
  "_searchresults": 12,
  "_metadata": {
    "review": {
      "id": 191,
      "status": "reviewed",
      "version": "0.1.2",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/191"
    },
    "ropensci_category": "literature"
  },
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/rtika.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/rtika",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.8",
      "date": "2018-05-02"
    },
    {
      "version": "1.1.19",
      "date": "2018-10-05"
    },
    {
      "version": "1.19.1",
      "date": "2018-11-15"
    },
    {
      "version": "1.20",
      "date": "2019-03-04"
    },
    {
      "version": "1.21",
      "date": "2019-06-22"
    },
    {
      "version": "1.22",
      "date": "2019-08-02"
    },
    {
      "version": "1.23",
      "date": "2019-12-13"
    },
    {
      "version": "1.24.1",
      "date": "2020-04-25"
    },
    {
      "version": "2.0.0",
      "date": "2021-08-06"
    },
    {
      "version": "2.4.1",
      "date": "2022-09-26"
    },
    {
      "version": "2.7.0",
      "date": "2023-05-05"
    },
    {
      "version": "3.2.3",
      "date": "2025-10-12"
    }
  ],
  "_exports": [
    "install_tika",
    "java",
    "tika",
    "tika_check",
    "tika_fetch",
    "tika_html",
    "tika_jar",
    "tika_json",
    "tika_json_text",
    "tika_text",
    "tika_xml"
  ],
  "_help": [
    {
      "page": "install_tika",
      "title": "Install or Update the Apache Tika 'jar'",
      "topics": [
        "install_tika"
      ]
    },
    {
      "page": "java",
      "title": "System Command to Run Java",
      "topics": [
        "java"
      ]
    },
    {
      "page": "tika",
      "title": "Main R Interface to 'Apache Tika'",
      "topics": [
        "tika"
      ]
    },
    {
      "page": "tika_check",
      "title": "Check Tika against a checksum",
      "topics": [
        "tika_check"
      ]
    },
    {
      "page": "tika_fetch",
      "title": "Fetch Files with the Content-Type Preserved in the File Extension",
      "topics": [
        "tika_fetch"
      ]
    },
    {
      "page": "tika_html",
      "title": "Get Structured XHTML",
      "topics": [
        "tika_html"
      ]
    },
    {
      "page": "tika_jar",
      "title": "Path to Apache Tika",
      "topics": [
        "tika_jar"
      ]
    },
    {
      "page": "tika_json",
      "title": "Get json Metadata and XHTML Content",
      "topics": [
        "tika_json"
      ]
    },
    {
      "page": "tika_json_text",
      "title": "Get json Metadata and Plain Text Content",
      "topics": [
        "tika_json_text"
      ]
    },
    {
      "page": "tika_text",
      "title": "Get Plain Text",
      "topics": [
        "tika_text"
      ]
    },
    {
      "page": "tika_xml",
      "title": "Get a Structured XHTML Rendition",
      "topics": [
        "tika_xml"
      ]
    }
  ],
  "_readme": "https://github.com/ropensci/rtika/raw/master/README.md",
  "_rundeps": [
    "backports",
    "curl",
    "digest",
    "sys"
  ],
  "_vignettes": [
    {
      "source": "rtika_introduction.Rmd",
      "filename": "rtika_introduction.html",
      "title": "Introduction to rtika",
      "author": "Sasha Goodman",
      "engine": "knitr::rmarkdown",
      "headings": [
        "A Digital Babel Fish",
        "Extract Plain Text",
        "Preserve Content-Type when Downloading",
        "Settings for Big Datasets",
        "Get a Structured XHTML Rendition",
        "Access Metadata in the XHTML",
        "Get Metadata in JSON Format",
        "Get Metadata from \"Container\" Documents",
        "Extending rtika",
        "References"
      ],
      "created": "2018-03-02 03:09:08",
      "modified": "2019-08-02 05:39:32",
      "commits": 19
    }
  ],
  "_score": 5.99563519459755,
  "_indexed": true,
  "_nocasepkg": "rtika",
  "_universes": [
    "ropensci",
    "soshsquatch"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-05-15T08:18:16.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "d91144f94d17a50ce65d305d2378865b2ccf83e5f6669631324abfdc9d0b75b8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "3.2.3",
      "date": "2026-05-15T08:18:09.000Z",
      "distro": "noble",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "7a03fe46d9d739db88b992c367588333b725993216ce6fcf36c213d2106feea8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-05-15T08:17:32.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "3984332fc6cb08e21408694f881ce4aa2add8e1dbb8f1f398e2ed2d7b82610c8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "3.2.3",
      "date": "2026-05-15T08:17:21.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "8ca1844fd620b6ac8517ed6407a058444fc4b995bb43022534f380e0c073aa51",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-05-15T08:17:13.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "538c381e115ccc5ea5aeeca24965d05896a40a20c46eea806af374b18fe86db5",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-05-15T08:17:04.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "f73d30e01f280ba90f2f3f73be67d09029f08d27d67481b64c1034dd2a14dddd",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "3.2.3",
      "date": "2026-05-15T08:17:18.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "409de21b6325d3c81cc5e3ff89970bc572b77a5d9a5e9ac74efd92fb7c94c671",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "3.2.3",
      "date": "2026-06-02T19:40:36.000Z",
      "commit": "350fcc3e90ab83dad8770021d6258147cf0c2ff0",
      "fileid": "c9141af9a6d77861c6ca097b05e045dd2f49221e13145a5d5db03ff75ebdad80",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25907572695"
    }
  ]
}