Source code for pyprophet.io.scoring.tsv

import pandas as pd
import click
from loguru import logger

from .._base import BaseReader, BaseWriter
from ..._config import RunnerIOConfig
from ...report import save_report



[docs]
class TSVReader(BaseReader):
    """
    Class for reading and processing data from OpenSWATH results stored in a tsv format.

    The TSVReader class provides methods to read different levels of data from tsv feature format.
    It supports reading data for semi-supervised learning only.

    Attributes:
        infile (str): Input file path.
        outfile (str): Output file path.
        classifier (str): Classifier used for semi-supervised learning.
        level (str): Level used in semi-supervised learning (e.g., 'ms1', 'ms2', 'ms1ms2', 'transition', 'alignment'), or context level used peptide/protein/gene inference (e.g., 'global', 'experiment-wide', 'run-specific').
        glyco (bool): Flag indicating whether analysis is glycoform-specific.

    Methods:
        read(): Read data from the input file based on the alogorithm.
    """


[docs]
    def __init__(self, config: RunnerIOConfig):
        super().__init__(config)



[docs]
    def read(self) -> pd.DataFrame:
        infile = self.config.infile
        table = pd.read_csv(infile, sep="\t")
        return table





[docs]
class TSVWriter(BaseWriter):
    """
    Class for writing OpenSWATH results to a tsv format.

    Attributes:
        infile (str): Input file path.
        outfile (str): Output file path.
        classifier (str): Classifier used for semi-supervised learning.
        level (str): Level used in semi-supervised learning (e.g., 'ms1', 'ms2', 'ms1ms2', 'transition', 'alignment').
        glyco (bool): Flag indicating whether analysis is glycoform-specific.

    Methods:
        save_results(result, pi0): Save the results to the output file based on the module using this class.
        save_weights(weights): Save the weights to the output file.
    """


[docs]
    def __init__(self, config: RunnerIOConfig):
        super().__init__(config)



[docs]
    def save_results(self, result, pi0):
        """
        Save the results of the scoring process to specified paths including summary statistics, final statistics, scored tables, and a report. The report includes various statistics such as cutoffs, svalues, qvalues, pvalues, and additional parameters like pi0 and color palette.
        """
        summ_stat_path = self.config.extra_writes.get("summ_stat_path")
        if summ_stat_path is not None:
            result.summary_statistics.to_csv(summ_stat_path, sep=",", index=False)
            logger.success("%s written." % summ_stat_path)

        full_stat_path = self.config.extra_writes.get("full_stat_path")
        if full_stat_path is not None:
            result.final_statistics.to_csv(full_stat_path, sep=",", index=False)
            logger.success("%s written." % full_stat_path)

        output_path = self.config.extra_writes.get("output_path")
        if output_path is not None:
            result.scored_tables.to_csv(output_path, sep="\t", index=False)
            logger.success("%s written." % output_path)

        if result.final_statistics is not None:
            cutoffs = result.final_statistics["cutoff"].values
            svalues = result.final_statistics["svalue"].values
            qvalues = result.final_statistics["qvalue"].values

            pvalues = result.scored_tables.loc[
                (result.scored_tables.peak_group_rank == 1)
                & (result.scored_tables.decoy == 0)
            ]["p_value"].values
            top_targets = result.scored_tables.loc[
                (result.scored_tables.peak_group_rank == 1)
                & (result.scored_tables.decoy == 0)
            ]["d_score"].values
            top_decoys = result.scored_tables.loc[
                (result.scored_tables.peak_group_rank == 1)
                & (result.scored_tables.decoy == 1)
            ]["d_score"].values

            save_report(
                self.config.extra_writes.get("report_path"),
                output_path,
                top_decoys,
                top_targets,
                cutoffs,
                svalues,
                qvalues,
                pvalues,
                pi0,
                self.config.runner.color_palette,
            )
            logger.success("%s written." % self.config.extra_writes.get("report_path"))