import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bioinfokit import visuz
# 1. Target files
gwas_input_file = "tassel_Sac_gwas_output2.txt"
manhattan_output = "gwas_manhattan_plot"
qq_output = "gwas_qq_plot.png"
print(f"Loading GWAS statistics from: {gwas_input_file}...")
if not os.path.exists(gwas_input_file):
print(f"Error: Could not find file '{gwas_input_file}' in this folder.")
exit()
# Load the file
df = pd.read_csv(gwas_input_file, sep="\t")
# Filter out 'None' intercept AND the 1e-9 TASSEL mathematical ceiling
df_filtered = df[(df["Marker"] != "None") & (df["p"] != 1e-9)].copy()
# Enforce numerical formatting
df_filtered["Chr"] = pd.to_numeric(df_filtered["Chr"])
df_filtered["Pos"] = pd.to_numeric(df_filtered["Pos"])
df_filtered["p"] = pd.to_numeric(df_filtered["p"])
df_filtered = df_filtered.dropna(subset=["Marker", "Chr", "Pos", "p"])
print(f"Processed {len(df_filtered)} true SNPs (excluding 1e-9 artifacts).")
# 3. Draw Manhattan Plot using bioinfokit
print("Generating clean Manhattan Plot...")
visuz.marker.mhat(
df=df_filtered,
chr="Chr",
pv="p",
gwas_sign_line=True,
gwasp=5e-8,
figtype="png",
figname=manhattan_output,
dim=(12, 6),
dotsize=5,
valpha=0.7
)
print(f"Manhattan plot successfully saved: {manhattan_output}.png")
# 4. Generate Q-Q Plot manually (Fixed Sorting Alignment)
print("Generating Q-Q Plot manually...")
n_snps = len(df_filtered)
# Sort observed p-values descending (so -log10 values are ascending: 0 -> max)
p_observed = np.sort(df_filtered["p"].values)[::-1]
log_p_observed = -np.log10(p_observed)
# Compute expected uniform p-values in descending order (so -log10 values are ascending: 0 -> max)
p_expected = np.arange(n_snps, 0, -1) / (n_snps + 1)
log_p_expected = -np.log10(p_expected)
# Render the plot
plt.figure(figsize=(6, 6))
plt.scatter(log_p_expected, log_p_observed, c="crimson", s=5, alpha=0.6, zorder=2)
# Draw the 45-degree diagonal null hypothesis line
max_val = max(max(log_p_expected), max(log_p_observed))
plt.plot([0, max_val], [0, max_val], color="black", linestyle="--", linewidth=1.5, zorder=1)
# Formatting
plt.title("GWAS Quantile-Quantile (Q-Q) Plot", fontsize=12, fontweight="bold")
plt.xlabel(r"Expected $-\log_{10}(P)$", fontsize=10)
plt.ylabel(r"Observed $-\log_{10}(P)$", fontsize=10)
plt.grid(True, which="both", linestyle=":", alpha=0.5)
plt.tight_layout()
# Save the final rendering
plt.savefig(qq_output, dpi=300)
plt.close()
print(f"Q-Q plot successfully saved: {qq_output}")
These are the scripts I used to create Manhattan and Q-Q plots
On Friday, June 12, 2026 at 4:59:23 PM UTC+2 Brandon Monier wrote: