Source code for sgrna_modeler.mutagenesis

import pandas as pd
import numpy as np
import random

[docs]def generate_variants(seq): # generate a list of all possible variants of a sequence variants = [] variant_nt = [] variant_pos = [] nts = ['A', 'C', 'T', 'G'] for i, seq_nt in enumerate(seq): for N in nts: if seq_nt != N: new_seq = seq[:i] + N + seq[i+1:] variants.append(new_seq) variant_nt.append(N) variant_pos.append(i) variant_df = pd.DataFrame({'variant': variants, 'nt': variant_nt, 'pos': variant_pos}) return variant_df
[docs]def mutagenize_seq(guide, model): variant_df = generate_variants(guide) variant_predictions = model.predict_seqs(variant_df.variant) variant_df['prediction'] = variant_predictions original_prediction = model.predict_seqs([guide]) variant_df['delta'] = variant_df.prediction - original_prediction summarized_df = variant_df.groupby('pos').agg({'delta': 'mean'}).reset_index() summarized_df['nt'] = list(guide) summarized_df['importance'] = np.absolute(summarized_df.delta) summarized_df['pos'] = summarized_df.pos + 1 summarized_df['context'] = guide return summarized_df
[docs]def mutagenize_model(model, resamples): nts = ['A', 'C', 'T', 'G'] seq_len = model.enzyme['context_length'] mutation_list = [] sequence_list = [] sequence = ''.join(random.choice(nts) for i in range(seq_len)) mutation = {'sequence': sequence, 'reference': '', 'nt': '','position': float('nan')} mutation_list.append(mutation) sequence_list.append(sequence) mutation_list = [] for i in range(resamples): while sequence in sequence_list: last_sequence = sequence position = random.randint(0, seq_len - 1) nt = random.choice(nts) sequence = sequence[:position] + nt + sequence[(position + 1):] mutation = {'sequence': sequence, 'reference': last_sequence, 'nt': nt, 'position': position + 1} mutation_list.append(mutation) sequence_list.append(sequence) sequence_df = pd.DataFrame(mutation_list) sequence_predictions = model.predict_seqs(sequence_df.sequence) prediction_df = pd.DataFrame({'sequence': sequence_df.sequence, 'prediction': sequence_predictions}) delta_df = (sequence_df.merge(prediction_df, on = 'sequence') .merge(prediction_df, left_on = 'reference', right_on = 'sequence', suffixes = ('_seq', '_ref'))) delta_df['delta'] = delta_df.prediction_seq - delta_df.prediction_ref return delta_df