Computing Tools

Personal reference of scripts, commands, and automation tools.

LaTeX

Letter template

LaTeXletter.tex

\documentclass{letter}
\signature{ALE, Biju}
\address{Kathmandu}
\begin{document}
\begin{letter}{To the HR \\ Some Institution \\ Some Address}
\opening{Dear ...,}
Body text goes here.
\closing{Yours Faithfully,}
\end{letter}
\end{document}

Wrap text around a figure

LaTeXwrapfigure.tex

\begin{wrapfigure}{r}{0.4\linewidth}
  \centering
  \includegraphics[width=\linewidth, keepaspectratio]{images/filename}
  \caption{Caption text}
  \vspace{-30pt}
\end{wrapfigure}

Word count with texcount

Generate: texcount.pl -1 -sum document.tex -out=document.sum
In preamble: \newcommand\wordcount{\input{\jobname.sum}} then call \wordcount where needed
Detailed summary: \usepackage{verbatim} then \newcommand\wordcount{\verbatiminput{\jobname.sum}}
Multi-file: add -inc flag to texcount.pl

PDF

Scanned PDF → B&W (Otsu threshold)

Pythonpdf_bw_otsu.py

import os
from pdf2image import convert_from_path
from PIL import Image, ImageEnhance
import numpy as np
import cv2
pdf_filename = "a.pdf"
if not os.path.isfile(pdf_filename):
    print(f"{pdf_filename} not found in the current directory.")
else:
    try:
        output_dir = "output_images"
        os.makedirs(output_dir, exist_ok=True)
        print("Converting PDF pages to images...")
        images = convert_from_path(pdf_filename, dpi=300, thread_count=4)
        bw_images = []
        for i, image in enumerate(images):
            print(f"Processing page {i + 1}...")
            gray_image = image.convert("L")
            enhancer = ImageEnhance.Contrast(gray_image)
            enhanced_image = enhancer.enhance(2)
            enhanced_np = np.array(enhanced_image)
            _, otsu = cv2.threshold(
                enhanced_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
            )
            otsu_image = Image.fromarray(otsu)
            path = os.path.join(output_dir, f"bw_page_{i + 1}.png")
            otsu_image.save(path, "PNG")
            bw_images.append(otsu_image)
        out_pdf = os.path.join(output_dir, "bw_output.pdf")
        print(f"Saving to {out_pdf}...")
        bw_images[0].save(out_pdf, save_all=True, append_images=bw_images[1:])
        print("Done!")
    except Exception as e:
        print(f"Error: {e}")

Scanned PDF → B&W (simple threshold)

Run ocrmypdf on the output afterwards to restore the text layer.

Pythonpdf_bw_simple.py

import os
from pdf2image import convert_from_path
from PIL import Image
pdf_filename = "a.pdf"
if not os.path.isfile(pdf_filename):
    print(f"{pdf_filename} not found.")
else:
    try:
        output_dir = "output_images"
        os.makedirs(output_dir, exist_ok=True)
        print("Converting PDF pages to images...")
        images = convert_from_path(pdf_filename, dpi=300, thread_count=4)
        bw_images = []
        for i, image in enumerate(images):
            print(f"Processing page {i + 1}...")
            gray = image.convert("L")
            bw = gray.point(lambda p: 255 if p > 128 else 0, mode='1')
            path = os.path.join(output_dir, f"bw_page_{i + 1}.png")
            bw.save(path, "PNG")
            bw_images.append(bw)
        out_pdf = os.path.join(output_dir, "bw_output.pdf")
        bw_images[0].save(out_pdf, save_all=True, append_images=bw_images[1:])
        print("Done!")
    except Exception as e:
        print(f"Error: {e}")

Resize all PDFs to A4

Bashpdf_resize_a4.sh

for pdf in *.pdf; do
    pdfjam --outfile "$pdf" --paper a4paper "$pdf"
done

Trim first page from all PDFs

Bashpdf_trim_first.sh

mkdir -p trimmed
for i in *.pdf; do
    pdftk "$i" cat 2-end output "trimmed/$i"
done

Batch DjVu → PDF

Bashdjvu_to_pdf.sh

for i in *.djvu; do
    djvu2pdf "$i" "${i/%.djvu/}.pdf"
done

Convert JPGs to PDF (per file)

for i in *.jpeg; do convert "$i" -auto-orient "${i%.jpeg}.pdf"; done

Combine all images into one PDF

convert *.jpg -auto-orient pictures.pdf

Search text inside PDFs

pdfgrep -C 3 -HiR -e "search term" *.pdf

Extract images from a PDF

mkdir extracted-images && pdfimages -all <path-to-pdf> extracted-images/image

llpp clipboard config (Linux)

Config~/.config/llpp.conf

selection-command='LC_CTYPE=UTF-8 xclip -i -selection clipboard'
paste-command='LC_CTYPE=UTF-8 xclip -o -selection clipboard'

Image

Copyright watermark

Auto-rotate EXIF orientation, then stamp a copyright notice on every image. Output files are prefixed f_.

Bashcopyright_watermark.sh

# Step 1 – fix EXIF rotation in-place
for img in *; do
    jhead -autorot "$img"
done
for img in *; do
    convert "${img}" \
        -gravity SouthWest \
        -font TrajanPro-Regular \
        -pointsize 50 \
        -stroke '#000C' -strokewidth 2 -annotate 0 '© Biju Ale' \
        -stroke  none  -undercolor '#00000080' \
        -fill white    -annotate 0 '© Biju Ale' \
        "f_${img}"
done

Remove white background (trim)

magick img1.jpeg -trim +repage img1_trim.jpeg

Vertical stitch (top → bottom)

magick img1.jpeg img2.jpeg -append final.jpeg

Horizontal stitch (side by side)

magick img1.jpeg img2.jpeg +append final.jpeg

Audio

MKV → MP3 (recursive)

find . -name '*.mkv' -exec bash -c 'ffmpeg -i "$1" -vn -b:a 320000 -y "${1%.mkv}.mp3"' _ {} \;

FLAC → M4A (Apple Lossless)

Usage: ./flac_to_m4a.sh flac m4a ./src ./dest

Bashflac_to_m4a.sh

#!/usr/bin/env bash
srcExt=$1
destExt=$2
srcDir=$3
destDir=$4
for filename in "$srcDir"/*."$srcExt"; do
    baseName=$(basename "${filename%.*}")
    ffmpeg -i "$filename" -acodec alac -c:v copy \
           "$destDir/$baseName.$destExt"
done
echo "Conversion from ${srcExt} to ${destExt} complete!"

FLAC → Video (static cover art)

Muxes audio with a still image into MP4. Usage: ./flac_to_vid.sh flac mp4 ./src ./dest

Bashflac_to_vid.sh

#!/usr/bin/env bash
srcExt=$1
destExt=$2
srcDir=$3
destDir=$4
cover="cover.jpg"   # place cover art in working directory
for filename in "$srcDir"/*."$srcExt"; do
    baseName=$(basename "${filename%.*}")
    ffmpeg -loop 1 -framerate 2 -i "$cover" \
           -i "$filename" \
           -c:v libx264 -preset medium -tune stillimage -crf 18 \
           -c:a copy -shortest -pix_fmt yuv420p \
           "$destDir/$baseName.$destExt"
done
echo "Conversion from ${srcExt} to ${destExt} complete!"

Split single FLAC with cue sheet

Install tools: sudo pacman -Syu cuetools shntool
Navigate to the directory with the .flac and .cue files
Split: shntool split -f *.cue -o flac *.flac
Tag: cuetag.sh *.cue split-track*.flac

Files

Folder binning by keyword

Moves all files whose name contains a given string into a subfolder of that name.

Pythonfolder_bin.py

import os, shutil, sys
def organize(search_string):
    base = os.getcwd()
    for _, _, filenames in os.walk(base):
        for filename in filenames:
            if search_string in filename:
                dest = os.path.join(base, search_string)
                os.makedirs(dest, exist_ok=True)
                print(f"Moving: {filename}")
                shutil.move(
                    os.path.join(base, filename),
                    os.path.join(dest, filename)
                )
if __name__ == '__main__':
    organize(sys.argv[1])

rsync — basic recursive

rsync -r <source>/ <destination>/

rsync — archive (preserves symlinks, permissions, timestamps, ownership)

rsync -a <source>/ <destination>/

rsync — verbose dry-run (preview changes)

rsync -av --dry-run <source>/ <destination>/

rsync — mirror (deletes destination-only files)

rsync -av --delete --dry-run <source>/ <destination>/

rsync — backup to remote (compressed)

rsync -zaP <source> user@ip_address:<destination>/

Recursive find and copy by extension

find ./ -name '*.xsl' -exec cp -prv '{}' '/path/to/targetDir/' ';'

Recursive copy matching files (multiple dirs)

find '/source-path' -name '*.JPG' -exec cp -t '/destination-path' {} +

Delete all files of an extension (Windows)

for /R %f in (*.flac) do echo del "%f" | cmd

Web

Internet Archive — get collection item list

ia search 'collection:mir_titles' --itemlist > items.txt

Internet Archive — download all PDFs (10 parallel)

Bashia_download.sh

cat items.txt | xargs -n 1 -P 10 -I {} bash -c 'ia download --no-directories --glob "*.pdf" --destdir=. "{}"'

Selenium starter

Pythonselenium_starter.py

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import ui
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
driver = webdriver.Firefox(capabilities=cap)
driver.get("https://www.google.com")

wget — recursive PDF download

Bashwget_recursive.sh

wget --header="Accept: text/html" \
     --user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0" \
     -r -nd -nH -np -A "*.pdf" \
     "https://example.com/files/"

wget — fix filenames from Content-Disposition

wget -c -i urls.txt --restrict-file-names=windows --content-disposition

wget — download archived MIT courseware

Bashwget_mit_ocw.sh

wget -c -r -np -k -E -p \
     -A pdf,html,htm \
     --wait=2 --random-wait --limit-rate=100k \
     --user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" \
     "https://dspace.mit.edu/bitstream/handle/1721.1/98368/24-221-fall-2005/contents/index.htm"

Mac

Remove stubborn login items

Delete the relevant entries for the offending app from these directories:

Pathslogin-item-dirs

/Library/Application Support/Microsoft/
/Library/LaunchAgents/
/Library/LaunchDaemons/
/Library/PrivilegedHelperTools/

Automator — add files to Calibre

Create a Quick Action in Automator (accepts files in Finder), add a Run Shell Script step:

Bashadd_to_calibre.sh

for f in "$@"; do
    open -a Calibre "$f"
done
osascript -e "display notification \"Added $# file(s) to Calibre\" with title \"Calibre\""