Source code for pyprophet.io.scoring.tsv

import pandas as pd
import click
from loguru import logger

from .._base import BaseReader, BaseWriter
from ..._config import RunnerIOConfig
from ...report import save_report


[docs] class TSVReader(BaseReader): """ Class for reading and processing data from OpenSWATH results stored in a tsv format. The TSVReader class provides methods to read different levels of data from tsv feature format. It supports reading data for semi-supervised learning only. Attributes: infile (str): Input file path. outfile (str): Output file path. classifier (str): Classifier used for semi-supervised learning. level (str): Level used in semi-supervised learning (e.g., 'ms1', 'ms2', 'ms1ms2', 'transition', 'alignment'), or context level used peptide/protein/gene inference (e.g., 'global', 'experiment-wide', 'run-specific'). glyco (bool): Flag indicating whether analysis is glycoform-specific. Methods: read(): Read data from the input file based on the alogorithm. """
[docs] def __init__(self, config: RunnerIOConfig): super().__init__(config)
[docs] def read(self) -> pd.DataFrame: infile = self.config.infile table = pd.read_csv(infile, sep="\t") return table
[docs] class TSVWriter(BaseWriter): """ Class for writing OpenSWATH results to a tsv format. Attributes: infile (str): Input file path. outfile (str): Output file path. classifier (str): Classifier used for semi-supervised learning. level (str): Level used in semi-supervised learning (e.g., 'ms1', 'ms2', 'ms1ms2', 'transition', 'alignment'). glyco (bool): Flag indicating whether analysis is glycoform-specific. Methods: save_results(result, pi0): Save the results to the output file based on the module using this class. save_weights(weights): Save the weights to the output file. """
[docs] def __init__(self, config: RunnerIOConfig): super().__init__(config)
[docs] def save_results(self, result, pi0): """ Save the results of the scoring process to specified paths including summary statistics, final statistics, scored tables, and a report. The report includes various statistics such as cutoffs, svalues, qvalues, pvalues, and additional parameters like pi0 and color palette. """ summ_stat_path = self.config.extra_writes.get("summ_stat_path") if summ_stat_path is not None: result.summary_statistics.to_csv(summ_stat_path, sep=",", index=False) logger.success("%s written." % summ_stat_path) full_stat_path = self.config.extra_writes.get("full_stat_path") if full_stat_path is not None: result.final_statistics.to_csv(full_stat_path, sep=",", index=False) logger.success("%s written." % full_stat_path) output_path = self.config.extra_writes.get("output_path") if output_path is not None: result.scored_tables.to_csv(output_path, sep="\t", index=False) logger.success("%s written." % output_path) if result.final_statistics is not None: cutoffs = result.final_statistics["cutoff"].values svalues = result.final_statistics["svalue"].values qvalues = result.final_statistics["qvalue"].values pvalues = result.scored_tables.loc[ (result.scored_tables.peak_group_rank == 1) & (result.scored_tables.decoy == 0) ]["p_value"].values top_targets = result.scored_tables.loc[ (result.scored_tables.peak_group_rank == 1) & (result.scored_tables.decoy == 0) ]["d_score"].values top_decoys = result.scored_tables.loc[ (result.scored_tables.peak_group_rank == 1) & (result.scored_tables.decoy == 1) ]["d_score"].values save_report( self.config.extra_writes.get("report_path"), output_path, top_decoys, top_targets, cutoffs, svalues, qvalues, pvalues, pi0, self.config.runner.color_palette, ) logger.success("%s written." % self.config.extra_writes.get("report_path"))