Compound-to-Assay Workflow With pc_* API

This vignette demonstrates an end-to-end compound-to-assay flow:

  1. Map names to CIDs.
  2. Retrieve AIDs from CIDs.
  3. Pull assay summaries.
library(PubChemR)
library(dplyr)
library(tibble)

Step 1: Name to CID

cid_map <- pc_identifier_map(
  identifier = c("aspirin", "ibuprofen", "caffeine"),
  namespace = "name",
  to = "cids",
  domain = "compound",
  cache = TRUE
)

as_tibble(cid_map)
#> # A tibble: 1 × 6
#>   success status from_cache pending data             CID  
#>   <lgl>    <int> <lgl>      <lgl>   <I<list>>        <chr>
#> 1 FALSE      404 FALSE      FALSE   <named list [1]> <NA>

Step 2: CID to AID

aid_map <- pc_identifier_map(
  identifier = c(2244, 3672, 2519),
  namespace = "cid",
  to = "aids",
  domain = "compound",
  cache = TRUE
)

aid_tbl <- as_tibble(aid_map)
aid_tbl
#> # A tibble: 9,258 × 6
#>    success status from_cache pending   CID   AID
#>    <lgl>    <int> <lgl>      <lgl>   <dbl> <dbl>
#>  1 TRUE       200 FALSE      FALSE    2244     1
#>  2 TRUE       200 FALSE      FALSE    2244     3
#>  3 TRUE       200 FALSE      FALSE    2244     9
#>  4 TRUE       200 FALSE      FALSE    2244    15
#>  5 TRUE       200 FALSE      FALSE    2244    19
#>  6 TRUE       200 FALSE      FALSE    2244    21
#>  7 TRUE       200 FALSE      FALSE    2244    23
#>  8 TRUE       200 FALSE      FALSE    2244    25
#>  9 TRUE       200 FALSE      FALSE    2244    29
#> 10 TRUE       200 FALSE      FALSE    2244    31
#> # ℹ 9,248 more rows

Step 3: Assay summary retrieval

# Select a subset of AIDs for demonstration
sel_aids <- unique(na.omit(unlist(aid_tbl$AID)))
sel_aids <- head(sel_aids, 10)

assay_res <- pc_assay(
  identifier = sel_aids,
  namespace = "aid",
  operation = "summary",
  cache = TRUE
)

as_tibble(assay_res)
#> # A tibble: 1 × 5
#>   success status from_cache pending data            
#>   <lgl>    <int> <lgl>      <lgl>   <I<list>>       
#> 1 TRUE       200 FALSE      FALSE   <named list [1]>

This pattern composes into larger pipelines with pc_batch() for throughput.