{
  "_id": "6a102754acfb0bcc41c90097",
  "Type": "Package",
  "Package": "pangoling",
  "Title": "Access to Large Language Model Predictions",
  "Version": "1.0.3",
  "Authors@R": "c(\nperson(\"Bruno\", \"Nicenboim\", , \"b.nicenboim@tilburguniversity.edu\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0002-5176-3943\")),\nperson(\"Chris\", \"Emmerly\", role = \"ctb\"),\nperson(\"Giovanni\", \"Cassani\", role = \"ctb\"),\nperson(\"Lisa\", \"Levinson\", role = \"rev\"),\nperson(\"Utku\", \"Turk\", role = \"rev\")\n)",
  "Description": "Provides access to word predictability estimates using\nlarge language models (LLMs) based on 'transformer'\narchitectures via integration with the 'Hugging Face' ecosystem\n<https://huggingface.co/>. The package interfaces with\npre-trained neural networks and supports both\ncausal/auto-regressive LLMs (e.g., 'GPT-2') and\nmasked/bidirectional LLMs (e.g., 'BERT') to compute the\nprobability of words, phrases, or tokens given their linguistic\ncontext. For details on GPT-2 and causal models, see Radford et\nal. (2019)\n<https://storage.prod.researchhub.com/uploads/papers/2020/06/01/language-models.pdf>,\nfor details on BERT and masked models, see Devlin et al. (2019)\n<doi:10.48550/arXiv.1810.04805>. By enabling a straightforward\nestimation of word predictability, the package facilitates\nresearch in psycholinguistics, computational linguistics, and\nnatural language processing (NLP).",
  "License": "MIT + file LICENSE",
  "URL": "https://docs.ropensci.org/pangoling/,\nhttps://github.com/ropensci/pangoling",
  "BugReports": "https://github.com/ropensci/pangoling/issues",
  "Config/testthat/edition": "3",
  "Encoding": "UTF-8",
  "Language": "en-US",
  "LazyData": "true",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.3.1",
  "StagedInstall": "yes",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libpng-dev python3",
  "Repository": "https://ropensci.r-universe.dev",
  "Date/Publication": "2026-01-13 15:59:45 UTC",
  "RemoteUrl": "https://github.com/ropensci/pangoling",
  "RemoteRef": "main",
  "RemoteSha": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-13 08:42:31 UTC",
    "User": "root"
  },
  "Author": "Bruno Nicenboim [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-5176-3943>),\nChris Emmerly [ctb],\nGiovanni Cassani [ctb],\nLisa Levinson [rev],\nUtku Turk [rev]",
  "Maintainer": "Bruno Nicenboim <b.nicenboim@tilburguniversity.edu>",
  "MD5sum": "6940c1240c1f57ec445b9d9bd2be73c2",
  "_user": "ropensci",
  "_type": "src",
  "_file": "pangoling_1.0.3.tar.gz",
  "_fileid": "95151d8209dc4f7ea9e0fa14a7636334155caeb74a90fcc42903b75ebbd49ae8",
  "_filesize": 1000042,
  "_sha256": "95151d8209dc4f7ea9e0fa14a7636334155caeb74a90fcc42903b75ebbd49ae8",
  "_created": "2026-05-13T08:42:31.000Z",
  "_published": "2026-05-22T09:52:20.457Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77356215517,
      "time": 203,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6965517941"
    },
    {
      "job": 77356215684,
      "time": 202,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6965517305"
    },
    {
      "job": 77356215364,
      "time": 101,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6965485057"
    },
    {
      "job": 77356215436,
      "time": 151,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6965500868"
    },
    {
      "job": 77356215111,
      "time": 173,
      "config": "pkgdown",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6965425474"
    },
    {
      "job": 77356215024,
      "time": 251,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6965450640"
    },
    {
      "job": 77356214857,
      "time": 157,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7157698197"
    },
    {
      "job": 77356215382,
      "time": 212,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "6965520994"
    },
    {
      "job": 77356215696,
      "time": 309,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "6965551316"
    },
    {
      "job": 77356215670,
      "time": 195,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6965515241"
    }
  ],
  "_buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/ropensci/pangoling",
  "_commit": {
    "id": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
    "author": "Bruno Nicenboim <bruno.nicenboim@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "fix DOI",
    "time": 1768319985
  },
  "_maintainer": {
    "name": "Bruno Nicenboim",
    "email": "b.nicenboim@tilburguniversity.edu",
    "login": "bnicenboim",
    "orcid": "0000-0002-5176-3943",
    "twitter": "@bruno_nicenboim",
    "uuid": 5982330
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "cachem",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "memoise",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "rstudioapi",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "tidyselect",
      "role": "Imports"
    },
    {
      "package": "tidytable",
      "version": ">= 0.7.2",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "brms",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "parallel",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "spelling",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "tictoc",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    }
  ],
  "_owner": "ropensci",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2026-03",
      "n": 1
    }
  ],
  "_tags": [],
  "_topics": [
    "nlp",
    "psycholinguistics",
    "transformers"
  ],
  "_stars": 12,
  "_contributors": [
    {
      "user": "bnicenboim",
      "count": 269,
      "uuid": 5982330
    }
  ],
  "_userbio": {
    "uuid": 1200269,
    "type": "organization",
    "name": "rOpenSci",
    "description": "Tools and R Packages for Open Science"
  },
  "_downloads": {
    "count": 592,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/pangoling"
  },
  "_devurl": "https://github.com/ropensci/pangoling",
  "_pkgdown": "https://docs.ropensci.org/pangoling/",
  "_searchresults": 16,
  "_metadata": {
    "review": {
      "id": 575,
      "status": "reviewed",
      "version": "0.0.0.9005",
      "organization": "rOpenSci Software Review",
      "url": "https://github.com/ropensci/software-review/issues/575"
    },
    "ropensci_category": "scalereprod"
  },
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/pangoling.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/ropensci/pangoling",
  "_realowner": "ropensci",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.3",
      "date": "2025-04-07"
    }
  ],
  "_exports": [
    "causal_config",
    "causal_lp",
    "causal_lp_mats",
    "causal_next_tokens_pred_tbl",
    "causal_next_tokens_tbl",
    "causal_pred_mats",
    "causal_preload",
    "causal_targets_pred",
    "causal_tokens_lp_tbl",
    "causal_tokens_pred_lst",
    "causal_words_pred",
    "install_py_pangoling",
    "installed_py_pangoling",
    "masked_config",
    "masked_lp",
    "masked_preload",
    "masked_targets_pred",
    "masked_tokens_pred_tbl",
    "masked_tokens_tbl",
    "ntokens",
    "perplexity_calc",
    "set_cache_folder",
    "tokenize_lst",
    "transformer_vocab"
  ],
  "_datasets": [
    {
      "name": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "object": "df_jaeger14",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "subject",
        "item",
        "cond",
        "word",
        "wordn",
        "rt",
        "region",
        "question",
        "accuracy",
        "correct_answer",
        "question_type",
        "experiment",
        "list",
        "sentence"
      ],
      "rows": 8624,
      "table": true,
      "tojson": true
    },
    {
      "name": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "object": "df_sent",
      "class": [
        "tidytable",
        "tbl",
        "data.table",
        "data.frame"
      ],
      "fields": [
        "sent_n",
        "word"
      ],
      "rows": 15,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "causal_config",
      "title": "Returns the configuration of a causal model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_config"
      ]
    },
    {
      "page": "causal_next_tokens_pred_tbl",
      "title": "Generate next tokens after a context and their predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_next_tokens_pred_tbl"
      ]
    },
    {
      "page": "causal_pred_mats",
      "title": "Generate a list of predictability matrices using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_pred_mats"
      ]
    },
    {
      "page": "causal_preload",
      "title": "Preloads a causal language model",
      "concept": [
        "causal model helper functions"
      ],
      "topics": [
        "causal_preload"
      ]
    },
    {
      "page": "causal_predictability",
      "title": "Compute predictability using a causal transformer model",
      "concept": [
        "causal model functions"
      ],
      "topics": [
        "causal_targets_pred",
        "causal_tokens_pred_lst",
        "causal_words_pred"
      ]
    },
    {
      "page": "df_jaeger14",
      "title": "Self-Paced Reading Dataset on Chinese Relative Clauses",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_jaeger14"
      ]
    },
    {
      "page": "df_sent",
      "title": "Example dataset: Two word-by-word sentences",
      "concept": [
        "datasets"
      ],
      "topics": [
        "df_sent"
      ]
    },
    {
      "page": "install_py_pangoling",
      "title": "Install the Python packages needed for 'pangoling'",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "install_py_pangoling"
      ]
    },
    {
      "page": "installed_py_pangoling",
      "title": "Check if the required Python dependencies for 'pangoling' are installed",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "installed_py_pangoling"
      ]
    },
    {
      "page": "masked_config",
      "title": "Returns the configuration of a masked model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_config"
      ]
    },
    {
      "page": "masked_preload",
      "title": "Preloads a masked language model",
      "concept": [
        "masked model helper functions"
      ],
      "topics": [
        "masked_preload"
      ]
    },
    {
      "page": "masked_targets_pred",
      "title": "Get the predictability of a target word (or phrase) given a left and right context",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_targets_pred"
      ]
    },
    {
      "page": "masked_tokens_pred_tbl",
      "title": "Get the possible tokens and their log probabilities for each mask in a sentence",
      "concept": [
        "masked model functions"
      ],
      "topics": [
        "masked_tokens_pred_tbl"
      ]
    },
    {
      "page": "ntokens",
      "title": "The number of tokens in a string or vector of strings",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "ntokens"
      ]
    },
    {
      "page": "perplexity_calc",
      "title": "Calculates perplexity",
      "concept": [
        "general functions"
      ],
      "topics": [
        "perplexity_calc"
      ]
    },
    {
      "page": "set_cache_folder",
      "title": "Set cache folder for HuggingFace transformers",
      "concept": [
        "helper functions"
      ],
      "topics": [
        "set_cache_folder"
      ]
    },
    {
      "page": "tokenize_lst",
      "title": "Tokenize an input",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "tokenize_lst"
      ]
    },
    {
      "page": "transformer_vocab",
      "title": "Returns the vocabulary of a model",
      "concept": [
        "token-related functions"
      ],
      "topics": [
        "transformer_vocab"
      ]
    }
  ],
  "_pkglogo": "https://github.com/ropensci/pangoling/raw/main/man/figures/logo.png",
  "_readme": "https://github.com/ropensci/pangoling/raw/main/README.md",
  "_rundeps": [
    "cachem",
    "cli",
    "data.table",
    "fastmap",
    "glue",
    "here",
    "jsonlite",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "memoise",
    "pillar",
    "png",
    "rappdirs",
    "Rcpp",
    "RcppTOML",
    "reticulate",
    "rlang",
    "rprojroot",
    "rstudioapi",
    "tidyselect",
    "tidytable",
    "utf8",
    "vctrs",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "troubleshooting.Rmd",
      "filename": "troubleshooting.html",
      "title": "Troubleshooting the use of Python in R",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Module not found error in Rstudio",
        "HTTPSConnectionPool error"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-bert.Rmd",
      "filename": "intro-bert.html",
      "title": "Using a Bert model to get the predictability of words in their context",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 08:53:06",
      "commits": 1
    },
    {
      "source": "intro-gpt2.Rmd",
      "filename": "intro-gpt2.html",
      "title": "Using a GPT2 transformer model to get word predictability",
      "engine": "knitr::rmarkdown",
      "headings": [
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 14:01:55",
      "commits": 2
    },
    {
      "source": "example.Rmd",
      "filename": "example.html",
      "title": "Worked-out example: Surprisal from a causal (GPT) model as a cognitive processing bottleneck in reading",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Data analysis",
        "0. Preprocessing",
        "1. Add surprisal values to the dataset",
        "2. Analyze the dataset with a Bayesian hierarchical model",
        "Conclusion",
        "References"
      ],
      "created": "2025-03-11 08:53:06",
      "modified": "2025-03-11 20:22:12",
      "commits": 2
    }
  ],
  "_score": 5.584331224367531,
  "_indexed": true,
  "_nocasepkg": "pangoling",
  "_universes": [
    "ropensci",
    "bnicenboim"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-05-13T08:45:35.000Z",
      "distro": "noble",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "86de3f8f59c0a780abecc5111cf6d84c16345b9265aebab4cb1e66a3d9062d8b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.0.3",
      "date": "2026-05-13T08:45:31.000Z",
      "distro": "noble",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "cf6cf9faabe31b7e92146bcf7f0dcba0df15e34d93afac1790f8ff0209ea4f31",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-05-13T08:44:04.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "e892f6726e6f51c8559f9e4c443e85c3fa2ecffb72d3ce0e001f1f54d4483ea4",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.0.3",
      "date": "2026-05-13T08:44:36.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "9c693832847e23e169bfd09e824a437a1d39f019a651184458660d9731952ca8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-13T08:44:17.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "de955fc27894c38e5fc258625670f4a6029b2ad0a4284d754c3d8120d8e13c15",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-13T08:44:29.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "928e339ce452de0fbcfebcf883752cd2501bf592b982cc3fe24571cd10c376d9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.0.3",
      "date": "2026-05-13T08:44:10.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "e3778c07e94ed1fd4fa09084e8ea0cdadb438e052dd914545d217369aa7a373b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.0.3",
      "date": "2026-05-22T09:51:57.000Z",
      "commit": "39916805ecb60c144f6a3bc531bfb7ff539f32d8",
      "fileid": "b719c406f062026e504b1e5470a8e6abd00bc60a7c495cb6f7eb709b37aaa7a8",
      "status": "success",
      "buildurl": "https://github.com/r-universe/ropensci/actions/runs/25788087842"
    }
  ]
}