Scripts and additional data generated while developing WGA protocol for Borrelia sequencing
This code generates coverage plots from samtools depth output files. It compares coverage between a standard sequencing protocol and a WGA (Whole Genome Amplification) protocol.
# install seaborn if not already installed
#import os
#os.system("pip install seaborn")
# Libraries
import argparse
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
import os
import numpy as np
# Insert input file (script for single file) manually. I want to compare
input_file = "/absolutepath/genome1/genome1.depth"
input_WGA = "/absolutepath/genome1/genome2.depth"
# Read file
df = pd.read_csv(input_file, sep="\t", header=None, names=["chrom", "pos", "depth"])
df_2 = pd.read_csv(input_WGA, sep="\t", header=None, names=["chrom", "pos", "depth"])
# Bin every X bp
bin_size = 1000
df_binned = df.groupby(df['pos'] // bin_size).agg({'depth': 'mean'}).reset_index()
df_binned['pos'] = df_binned['pos'] * bin_size # convert back to original scale
# bin WGA
df_binned_2 = df_2.groupby(df_2['pos'] // bin_size).agg({'depth': 'mean'}).reset_index()
df_binned_2['pos'] = df_binned_2['pos'] * bin_size # convert back to original scale
# Create plot
plt.figure(figsize=(15, 5))
sns.lineplot(data=df_binned, x="pos", y="depth", color="#0072B2",linewidth=0.5, label="Standard Protocol")
sns.lineplot(data=df_binned_2, x="pos", y="depth", color="#E69F00",linewidth=0.5, label="WGA Protocol")
#plt.title("Coverage Plot (binned every 1 kb)")
plt.xlabel("Position (kb)")
plt.ylabel("Coverage Depth")
# X-axis in kb
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{int(x/1000)}"))
# Optional: log scale
# plt.yscale("log")
plt.ylim(0, 19000)
plt.xlim(0, 1000000)
# Remove background grid
sns.despine()
plt.grid(True)
plt.legend() # <-- add legend
# Save as svg
output_svg = os.path.splitext(input_file)[0] + "_coverage_plot.svg"
plt.savefig(output_svg, format="svg", bbox_inches="tight")
plt.show()