#!/bin/bash

# Simple bash-script for searching raw HTML sources
# Uses Unix's Commons: awk, grep, head, tail

# Will run for cygWin (winOS) as well, but make sure
# you've got the path right to your archive (arcDir)

# Local archive folder
# --> Edit here for your own local path
arcDir="/pathto/archives/8chan/qresearch/.zfs/snapshot/grab-n-snap-20190713-0227/qresearch/res"
# For CygWin, path could be for example:
# arcDir="/cygdrive/D/archive/pol/res"

# SearchTerm provided by user -- make sure it's there and not too short
srcTerm="${1}"
if [ "${srcTerm}" == "" ]; then
  echo "Script searches for expressions in a HTML-archive"
  echo "No search term given -- script will exit. Next time"
  echo "try e.g.: ./searchArchive \"puzzle ever\""
  exit
else
  if [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -eq 1 ]; then
    echo "# Search term is 1 character long -- that's not smart..."
    echo "# Script refuses to search for \"${srcTerm}\""
    exit
  elif [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -le 3 ]; then
    echo "# Warning! Length of search term is ls ${arcDir}/*.html 2>/dev/null | wc -l
if [ ${nf} -eq 0 ]; then
  echo "# Error! No HTML-files found in \"${arcDir}\""
  echo "# Please check if archivePath (\"arcDir=...\") is set correct."
  exit
fi
echo "# Searching for \"${srcTerm}\" in \"${arcDir}\" (${nf} files)"

# TempFile and store all HTML-files in there
tmp="tmp.log"
ls ${arcDir}/*.html > ${tmp}

# Make some substitutions in searchTerm, like escaping spaces & "."
srcstr=echo "${srcTerm}" | sed 's/\ /\\\ /g' | sed 's/\./\\\./g'

# Read tempFile line by line
while read ifile; do
  # Count number of occurrences in current source file
  n=cat "${ifile}" | grep -iob "${srcstr}" | wc -l
  # If count is > 0, list occurrences
  if [ ${n} -gt 0 ]; then
    echo ">${n} occurrences in \"${ifile}\":"
    j=0
    for off in cat "${ifile}" | grep -iob "${srcstr}" | awk -F":" '{print $1+1}' | tr '\n' ' '; do
      j=expr $j + 1
      echo -n "   (${j}) "
      tail -c+${off} "${ifile}" | head -c640
      echo ""
    done
  fi
done < ${tmp}

# Clean up
rm -f ${tmp}