## -----------------------------------------------------------------------------
#| eval: false
# install.packages("BiocManager")
# BiocManager::install("SpectriPy")


## -----------------------------------------------------------------------------
#| label: libraries
#| message: false
#' R session:

library(Spectra)
library(SpectriPy)


## -----------------------------------------------------------------------------
stopifnot(require("MsDataHub"))


## -----------------------------------------------------------------------------
#| label: load-data
#| message: false
#' R session:

#' Loading the data from a mzML file as a `Spectra` object
library(MsDataHub)
fl <- MsDataHub::PestMix1_DDA.mzML()
mzml_r <- Spectra(fl)


## -----------------------------------------------------------------------------
#| label: filter MS level
#' R session:

#' Restrict to MS level 2 spectra
mzml_r <- filterMsLevel(mzml_r, 2)
mzml_r <- mzml_r[lengths(mzml_r) >= 3]
mzml_r


## -----------------------------------------------------------------------------
#| label: convert spectra to matchms
#' R session:

#' Convert the R Spectra to a list of Python matchms.Spectrum objects
tmp <- rspec_to_pyspec(mzml_r)


## -----------------------------------------------------------------------------
#' R session:

#' Class of the converted variable
class(tmp)

#' First element
class(tmp[1])


## #' Python session:
## 
## #' Access the Python data structure stored in the R session
## r.tmp

## -----------------------------------------------------------------------------
#| label: assign variable to Python
#' R session:

#' Assign the data to a variable in the Python environment
py_set_attr(py, "mzml_py", rspec_to_pyspec(mzml_r))
names(py)


## #' Python session:
## 
## #' Data type of the variable:
## type(mzml_py)
## 
## #' The length of the list:
## len(mzml_py)
## 
## #' Data type of the first element:
## type(mzml_py[0])
## 
## #' Intensities of the first spectrum:
## mzml_py[0].peaks.intensities

## #' Python session:
## 
## import matchms.filtering as mms_filt
## 
## #' Iterate over the Spectrum list and scale the intensities
## for i in range(len(mzml_py)):
##     mzml_py[i] = mms_filt.normalize_intensities(mzml_py[i])
## 
## #' Intensities for the first spectrum
## mzml_py[0].peaks.intensities

## -----------------------------------------------------------------------------
#| label: inspect changed intensities in R
#' R session:

#' Access the intensities of the first spectrum
py_get_attr(py, "mzml_py")[0]$peaks$intensities


## #' Python session:
## 
## #' Access the `fl` variable from the R session:
## r.fl
## 
## type(r.fl)

## #' Python session:
## 
## #' Access the `Spectra` object with the original data from R; the data
## #' gets directly translated on-the-fly
## r.mzml_r
## 
## #' Access the intensities of the first spectrum
## r.mzml_r[0].peaks.intensities

## -----------------------------------------------------------------------------
#| label: define MGF file
#' R session:

f_mgf <- system.file("extdata", "mgf", "test.mgf", package = "SpectriPy")


## #' Python session
## 
## import matchms
## from matchms.importing import load_from_mgf
## 
## mgf_py = list(load_from_mgf(r.f_mgf))
## mgf_py

## -----------------------------------------------------------------------------
#' R session:

#' Access the first spectrum
py$mgf_py[[1]]


## -----------------------------------------------------------------------------
#' R session:

#' Extract the peaks matrix from the first spectrum
py_to_r(py$mgf_py[[1]]$peaks$to_numpy)


## -----------------------------------------------------------------------------
#' R session:

#' Convert and copy the data to R
mgf_r <- pyspec_to_rspec(py_get_attr(py, "mgf_py"))
mgf_r

#' Extract intensities
mgf_r$intensity


## -----------------------------------------------------------------------------
#| label: initialize MsBackendPy for Python variable
#' R session:

#' Create a Spectra object with a MsBackendPy backend for the
#' attribute "mgf_py"
mgf <- Spectra("mgf_py", source = MsBackendPy())
mgf


## -----------------------------------------------------------------------------
#| label: access MS level and intensity through MsBackendPy
#' R session:

#' Extract MS level
msLevel(mgf)

#' Extract intensity values
intensity(mgf)


## #' Python session:
## 
## #' Scale intensities
## for i in range(len(mgf_py)):
##     mgf_py[i] = mms_filt.normalize_intensities(mgf_py[i])
## 

## -----------------------------------------------------------------------------
#| label: get intensities after modifying in Python
#' R session:

#' Get intensities after scaling the intensities in Python:
intensity(mgf)


## -----------------------------------------------------------------------------
#' R session:

mzml_r


## -----------------------------------------------------------------------------
#' R session:

mzml_r <- setBackend(mzml_r, backend = MsBackendPy(),
                     pythonVariableName = "mzml_p")
mzml_r


## -----------------------------------------------------------------------------
#' R session:

print(object.size(mzml_r), units = "MB")


## -----------------------------------------------------------------------------
#' R session:

mz(mzml_r)


## #' Python session:
## 
## len(mzml_p)
## 
## mzml_p[0].peaks.mz

## -----------------------------------------------------------------------------
#' R session:

spectraVariableMapping("matchms")


## -----------------------------------------------------------------------------
#' R session:

spectraVariableMapping("spectrum_utils")


## -----------------------------------------------------------------------------
#' R session:

spectraVariableMapping(mzml_r)


## -----------------------------------------------------------------------------
#' R session:

library(MsBackendMgf)
sps <- Spectra(system.file("extdata", "mgf", "test.mgf",
                           package = "SpectriPy"), source = MsBackendMgf())
spectraVariables(sps)


## -----------------------------------------------------------------------------
#' R session:

mp <- spectraVariableMapping("matchms", c(SMILES = "smiles", INCHI = "inchi"))
mp


## -----------------------------------------------------------------------------
#' R session:

sps <- setBackend(sps, backend = MsBackendPy(),
                  pythonVariableName = "sps_p",
                  spectraVariableMapping = mp)
spectraVariables(sps)


## -----------------------------------------------------------------------------
#' R session:

sps$SMILES |> head()


## -----------------------------------------------------------------------------
#' R session:

fl <- system.file("extdata", "mgf", "test.mgf", package = "SpectriPy")
s_mgf <- Spectra(fl, source = MsBackendMgf())
s_mgf <- setBackend(s_mgf, MsBackendPy(), pythonVariableName = "mgf_data")
s_mgf


## -----------------------------------------------------------------------------
#' R session:

#' Enable copy-on-replace for MsBackendPy
pyspec_copy_on_replace(TRUE)

#' Make a subset of the original data and assign that to a different Spectra
s_mgf_sub <- s_mgf[1:10]


## -----------------------------------------------------------------------------
#' R session:

#' Get the name of the asscociated Python variable
s_mgf@backend@py_var
s_mgf_sub@backend@py_var

#' The index to the spectrum objects in the Python list
s_mgf@backend@i
s_mgf_sub@backend@i


## -----------------------------------------------------------------------------
#' R session:

s_mgf_sub$rtime <- 1:10 + 0.1


## -----------------------------------------------------------------------------
#' R session:

s_mgf_sub@backend@py_var


## -----------------------------------------------------------------------------
#' R session:

s_mgf@backend@py_var


## -----------------------------------------------------------------------------
#' R session:

pyspec_copy_on_replace(FALSE)


## -----------------------------------------------------------------------------
#' R session:

#' List the *default* spectra variable mapping in R and python, respectively
defaultSpectraVariableMapping()


## #' Python session:
## 
## #' Available metadata for the first spectrum
## mgf_py[0].metadata.keys()

## -----------------------------------------------------------------------------
#' R session:

#' Add mapping for additional spectra variables to the default mapping in R and
#' python, respectively
map <- c(defaultSpectraVariableMapping(), smiles = "smiles",
         inchi = "inchi", name = "compound_name")
map


## -----------------------------------------------------------------------------
#| label: convert R to Python with spectra variables
#' R session:

#' Convert the Python MS data structures to an R `Spectra`
mgf_r <- pyspec_to_rspec(py_get_attr(py, "mgf_py"), mapping = map)
spectraVariables(mgf_r)


## -----------------------------------------------------------------------------
#| label: access spectra variables from Python in R
#' R session:

#' Show the first values for the spectra variable "name"
mgf_r$name |>
    head()

#' Show the first values for the spectra variable "inchi"
mgf_r$inchi |>
    head()


## -----------------------------------------------------------------------------
#' R session:

#' List available spectra variables
spectraVariables(mgf)


## -----------------------------------------------------------------------------
#' R session:

#' Get the first entries from the inchi variable
mgf$inchi |>
    head()


## -----------------------------------------------------------------------------
#| label: add additional mapping
#' R session:

#' Add mapping for additional spectra variables to the `MsBackendPy`
m <- defaultSpectraVariableMapping()
m["precursorIntensity"] <- "pepmassint"
spectraVariableMapping(mgf) <- m

#' List available spectra variables
spectraVariables(mgf)

#' Get spectra data for two variables
spectraData(mgf, c("precursorMz", "precursorIntensity"))


## -----------------------------------------------------------------------------
#| label: compareSpectra with mzML and MGF
#' R session:

#' Create a `Spectra` for the scaled MS data in Python
mzml <- Spectra("mzml_py", source = MsBackendPy())

#' Calculate the pairwise similarity between all spectra
sim <- compareSpectra(mzml, mgf, tolerance = 0.1)
dim(sim)


## #' Python session:
## 
## import matchms.similarity as mms_similarity
## 
## #' Calculate similarity scores
## scores = matchms.calculate_scores(
##     mzml_py, mgf_py, mms_similarity.CosineHungarian(tolerance = 0.1))
## 
## #' Extract similarity scores
## sim = scores.to_array()["CosineHungarian_score"]

## -----------------------------------------------------------------------------
#| label: compare dot product vs Cosine Hungarian
#' R session:

#' Plot the similarity scores against each other
plot(sim, py$sim, pch = 21, col = "#000000ce", bg = "#00000060",
     xlab = "Dot product", ylab = "Cosine Hungarian")
grid()


## -----------------------------------------------------------------------------
#' R session:

sessionInfo()

