Migration script built initially to connect between Homeserver and Public VPS

This commit is contained in:
arul 2025-11-15 19:13:19 +00:00
commit f63f01a7fd
10 changed files with 506 additions and 0 deletions

0
README.md Normal file
View File

View File

@ -0,0 +1,56 @@
import os
import re
import shutil
def clean_md_file(input_path, output_path):
with open(input_path, 'r', encoding='utf-8') as f:
content = f.read()
# Remove YAML frontmatter (with surrounding ---)
content = re.sub(r'^---\n(.*?\n)*?---\n', '', content, flags=re.DOTALL)
# Remove all Hugo shortcodes like {{< ... >}} or {{< /... >}}
content = re.sub(r'{{<[^>]*>}}', '', content)
# Remove comments shortcode and everything after
content = re.sub(r'{{<\s*comments\s*>}}.*', '', content, flags=re.DOTALL)
# Remove trailing shell prompt lines (optional)
content = re.sub(r'^hacker@selfhost1:.*$', '', content, flags=re.MULTILINE)
# Collapse multiple blank lines to max two
content = re.sub(r'\n{3,}', '\n\n', content)
# Strip leading/trailing whitespace
content = content.strip()
# Write cleaned content
with open(output_path, 'w', encoding='utf-8') as f:
f.write(content)
print(f"Processed: {input_path} -> {output_path}")
def batch_clean_md_files():
input_dir = '/var/www/html/professional-site/content/post'
assets_dir = '/var/www/html/professional-site/assets/summaries'
static_dir = '/var/www/html/professional-site/static/summaries'
os.makedirs(assets_dir, exist_ok=True)
os.makedirs(static_dir, exist_ok=True)
for filename in os.listdir(input_dir):
if filename.endswith('.md'):
input_path = os.path.join(input_dir, filename)
output_filename = filename.replace('.md', '.txt')
output_path_assets = os.path.join(assets_dir, output_filename)
output_path_static = os.path.join(static_dir, output_filename)
clean_md_file(input_path, output_path_assets)
# Also copy the cleaned file to static folder
shutil.copyfile(output_path_assets, output_path_static)
print(f"Copied summary to static: {output_path_static}")
batch_clean_md_files()

12
initiator/check-log Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
LOG_PATH="/var/log/sync-to-pro.log"
if [ -f "$LOG_PATH" ]; then
echo "📄 Showing contents of $LOG_PATH:"
cat "$LOG_PATH"
else
echo "⚠️ Log file not found: $LOG_PATH"
exit 1
fi

71
initiator/initiate Executable file
View File

@ -0,0 +1,71 @@
#!/bin/bash
LOG_FILE="/var/log/hugo_sync.log"
SYNC_TO_PRO_LOG="/var/log/sync-to-pro.log"
# Run sync-to-pro first every time with logging
echo "🔄 Running sync-to-pro to update log... (logged to $SYNC_TO_PRO_LOG)"
/home/arul/auto-scripts/professional-site-scripts/sync-content-pro-site/sync-to-pro >> "$SYNC_TO_PRO_LOG" 2>&1
# Function for deployment chain
run_deployment_chain() {
echo "Changing directory to working path..."
cd /home/arul/auto-scripts/professional-site-scripts || {
echo "❌ Failed to cd to working directory."
exit 1
}
echo "1. 🚀 Generating summaries..."
python3 generate-summaries/summarize.py
echo "2. 📤 Syncing contents from personal site to professional site..."
sync-content-pro-site/sync-to-pro
echo "3. 🧐 Checking sync logs..."
sync-content-pro-site/check-sync-logs
echo "4. 🗺️ Generating redirect map..."
python3 redirect-site-upload-map/generate-nginx-map.py
echo "5. ☁️ Uploading map via SFTP..."
bash redirect-site-upload-map/upload-map.sftp.sh
echo "6. 🔄 Restarting Hugo Professional site to update latest contents on Local Home Server 🏠📰"
cd /var/www/html/professional-site/ && sudo hugo
echo "7. 🔃 Restarting reverse proxy tunnel in remote tunnel"
cd /home/arul/auto-scripts/professional-site-scripts && bash sync-content-pro-site/tunnel-reverseproxy-restart
echo "🎉 Deployment chain completed"
}
# Check log for latest sync status
if [ ! -f "$LOG_FILE" ]; then
echo "❌ Log file not found: $LOG_FILE"
exit 1
fi
LATEST_TIMESTAMP=$(tac "$LOG_FILE" | grep -m 1 -oP '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' | head -1)
if [ -z "$LATEST_TIMESTAMP" ]; then
echo "❌ Could not find any timestamp in the log."
exit 1
fi
LOG_BLOCK=$(awk -v ts="$LATEST_TIMESTAMP" '{
if ($0 ~ ts) start=1
if (start) print
}' "$LOG_FILE")
if echo "$LOG_BLOCK" | grep -q "✅ Synced files:"; then
echo "🔁 Changes detected in latest log block. Proceeding with deployment."
run_deployment_chain
exit 0
elif echo "$LOG_BLOCK" | grep -q "☑️ Already in sync. No changes needed."; then
echo "✅ No changes detected in latest log block. Skipping deployment."
exit 1
else
echo "⚠️ Could not determine sync status from latest log block."
exit 1
fi

View File

@ -0,0 +1,57 @@
import os
import yaml
VALID_TAGS = {"Tech", "Tech tutorials"}
CONTENT_DIR = "/var/www/html/arulbalaji.xyz/content/journal"
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "redirect-slugs")
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "tech-blogs.map")
def extract_tags_and_slug(filepath):
print(f"Processing file: {filepath}")
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
if not lines or lines[0].strip() != "---":
print(" - No front matter found.")
return None
front_matter = []
for line in lines[1:]:
if line.strip() == "---":
break
front_matter.append(line)
try:
data = yaml.safe_load(''.join(front_matter))
tags = set(map(str.strip, data.get("tags", [])))
print(f" - Tags found: {tags}")
if tags & VALID_TAGS:
slug = data.get("slug", os.path.splitext(os.path.basename(filepath))[0])
print(f" - Matched tags! Returning slug: /journal/{slug}")
return f"/journal/{slug}"
else:
print(" - No matching tags.")
except Exception as e:
print(f"Error parsing {filepath}: {e}")
return None
def generate_map():
os.makedirs(OUTPUT_DIR, exist_ok=True)
entries = []
for filename in os.listdir(CONTENT_DIR):
if filename.endswith(".md"):
path = os.path.join(CONTENT_DIR, filename)
result = extract_tags_and_slug(path)
if result:
entries.append(f"{result} 1;")
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
f.write("# Auto-generated by Python\n")
for entry in sorted(entries):
f.write(entry + "\n")
print(f"✅ Generated NGINX map at {OUTPUT_FILE} with {len(entries)} entries.")
if __name__ == "__main__":
generate_map()

View File

@ -0,0 +1,18 @@
# Auto-generated by Python
/journal/broken_phone_server 1;
/journal/cyberdeck-build-prototype-2025 1;
/journal/cyberdeck_upgrade-1 1;
/journal/git-tutorial-part1 1;
/journal/git-tutorial-part2 1;
/journal/guacamole-setup-part1 1;
/journal/java_RestApi_Server 1;
/journal/java_RestApi_Server-part2 1;
/journal/java_RestApi_Server-part3 1;
/journal/learning_Webapp_Exploitation 1;
/journal/no_ui_project 1;
/journal/no_ui_youtube_subscribe 1;
/journal/self_hosting_platforms 1;
/journal/selfhosted_mail_server_issues 1;
/journal/selfhosting_on_Premise 1;
/journal/tor-router 1;
/journal/youtube-restriction-bypass 1;

View File

@ -0,0 +1,13 @@
#!/bin/bash
LOCAL_FILE="/home/arul/auto-scripts/professional-site-scripts/redirect-site-upload-map/redirect-slugs/tech-blogs.map"
REMOTE_USER="root"
REMOTE_HOST="74.208.74.61"
REMOTE_FILE="/root/redirect-slugs/tech-blogs.map"
KEY_PATH="/home/arul/.ssh/id_rsa"
sftp -i "$KEY_PATH" ${REMOTE_USER}@${REMOTE_HOST} <<EOF
put ${LOCAL_FILE} ${REMOTE_FILE}
bye
EOF

View File

@ -0,0 +1,27 @@
#!/bin/bash
LOG_FILE="/var/log/hugo_sync.log"
echo "🔍 Checking Hugo Sync Log: $LOG_FILE"
# Check if log file exists
if [[ ! -f "$LOG_FILE" ]]; then
echo "❌ Log file not found."
exit 1
fi
echo ""
echo "🗂️ Recent Sync Summary:"
echo "--------------------------"
# Show last 10 sync entries
tail -n 10 "$LOG_FILE" | grep 'Synced:' || echo "No recent sync entries."
echo ""
echo "📅 Last Sync Time:"
stat -c "Last modified: %y" "$LOG_FILE"
echo ""
echo "📈 Total sync entries so far:"
grep -c 'Synced:' "$LOG_FILE"

237
sync-content-pro-site/sync-to-pro Executable file
View File

@ -0,0 +1,237 @@
#!/usr/bin/env python3
import os
import re
import shutil
import logging
from pathlib import Path
import pwd
import grp
import filecmp
# ==== CONFIG ====
ARUL_SPACE_CONTENT = "/var/www/html/arulbalaji.xyz/content/journal"
PRO_CONTENT = "/var/www/html/professional-site/content/post"
ARUL_SPACE_MEDIA = "/var/www/html/arulbalaji.xyz/static/media"
PRO_MEDIA = "/var/www/html/professional-site/static/media"
SUMMARY_FOLDER = "/var/www/html/professional-site/assets/summaries"
LOG_FILE = "/var/log/hugo_sync.log"
VALID_TAGS = {"Tech", "Tech tutorials"}
OWNER_USER = "arul" # Set to None to skip chown
# ==== LOGGER SETUP ====
logger = logging.getLogger("SyncLogger")
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
file_handler = logging.FileHandler(LOG_FILE)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# ==== HELPERS ====
def parse_frontmatter(lines):
front = {}
in_frontmatter = False
body_start = 0
for i, line in enumerate(lines):
if line.strip() == "---":
if not in_frontmatter:
in_frontmatter = True
else:
body_start = i + 1
break
elif in_frontmatter and ':' in line:
key, value = line.split(':', 1)
front[key.strip()] = value.strip().strip('"')
return front, body_start
def extract_meta_comments(lines):
description = None
featured = None
for line in lines:
if "#pro-site description:" in line:
description = line.split(":", 1)[1].strip()
if "#pro-site featured_image:" in line:
featured = line.split(":", 1)[1].strip()
return description, featured
def has_valid_tags(front):
tags_line = front.get("tags", "")
tags = re.findall(r'"(.*?)"', tags_line)
return any(tag in VALID_TAGS for tag in tags)
def chown_to_user(file_path, username):
if not username:
return
try:
uid = pwd.getpwnam(username).pw_uid
gid = grp.getgrnam(username).gr_gid
os.chown(file_path, uid, gid)
except Exception as e:
logger.warning(f"Could not chown {file_path}: {e}")
def strip_shortcodes(content):
return re.sub(r"{{<[^>]+>}}", "", content)
def files_are_identical(file1, file2):
try:
if os.path.getsize(file1) != os.path.getsize(file2):
return False
return filecmp.cmp(file1, file2, shallow=False)
except Exception as e:
logger.warning(f"Error comparing files {file1} and {file2}: {e}")
return False
# ==== TRANSFORMATION ====
def transform_file(file_path):
try:
with open(file_path, "r") as f:
lines = f.readlines()
except PermissionError:
logger.error(f"Permission denied: {file_path}")
return None, None
if any(part.startswith('.') for part in file_path.parts):
return None, None
front, body_start = parse_frontmatter(lines)
if not has_valid_tags(front):
return None, None
description, featured_image = extract_meta_comments(lines)
title = front.get("title", "Untitled")
raw_date = front.get("date", "1970-01-01")
formatted_date = f"{raw_date}T00:00:00+05:30"
body = "".join(lines[body_start:]).strip()
new_frontmatter = f"""---
date: {formatted_date}
description: "{description or ''}"
featured_image: "{featured_image or ''}"
title: "{title}"
---
"""
full_markdown = new_frontmatter + "\n\n" + body
plain_text_summary = strip_shortcodes(body).strip()
return full_markdown, plain_text_summary
# ==== MEDIA FOLDER COMPARISON ====
def are_folders_identical(folder1, folder2):
def get_all_files(base_folder):
file_set = set()
for root, _, files in os.walk(base_folder):
for f in files:
full_path = os.path.join(root, f)
rel_path = os.path.relpath(full_path, base_folder)
file_set.add(rel_path)
return file_set
if not os.path.exists(folder1) or not os.path.exists(folder2):
return False
files1 = get_all_files(folder1)
files2 = get_all_files(folder2)
if files1 != files2:
return False
for rel_file in files1:
file1 = os.path.join(folder1, rel_file)
file2 = os.path.join(folder2, rel_file)
if not files_are_identical(file1, file2):
return False
return True
# ==== MAIN SYNC CHECK ====
def check_already_synced():
for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"):
transformed, _ = transform_file(md_file)
if not transformed:
continue
pro_file_path = Path(PRO_CONTENT) / md_file.name
if not pro_file_path.exists():
return False
try:
with open(pro_file_path, "r") as f:
pro_content = f.read()
if pro_content != transformed:
return False
except Exception as e:
logger.warning(f"Could not read {pro_file_path}: {e}")
return False
return True
# ==== MEDIA SYNC ====
def copy_media_folder():
if not os.path.exists(ARUL_SPACE_MEDIA):
logger.warning(f"Media folder not found in Arul's space at {ARUL_SPACE_MEDIA}")
return
try:
if os.path.exists(PRO_MEDIA):
shutil.rmtree(PRO_MEDIA)
shutil.copytree(ARUL_SPACE_MEDIA, PRO_MEDIA, dirs_exist_ok=True)
if OWNER_USER:
for root, dirs, files in os.walk(PRO_MEDIA):
for name in files:
chown_to_user(os.path.join(root, name), OWNER_USER)
logger.info("📁 Media folder synced from Arul's space → professional site")
except Exception as e:
logger.error(f"Failed to sync media folder from Arul's space: {e}")
# ==== MAIN SYNC ====
def sync_markdowns():
synced = []
Path(PRO_CONTENT).mkdir(parents=True, exist_ok=True)
Path(SUMMARY_FOLDER).mkdir(parents=True, exist_ok=True)
for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"):
try:
transformed, summary = transform_file(md_file)
if transformed:
out_path = Path(PRO_CONTENT) / md_file.name
with open(out_path, "w") as out_file:
out_file.write(transformed)
if OWNER_USER:
chown_to_user(out_path, OWNER_USER)
synced.append(str(md_file.name))
summary_path = Path(SUMMARY_FOLDER) / (md_file.stem + ".txt")
with open(summary_path, "w") as sum_file:
sum_file.write(summary)
if OWNER_USER:
chown_to_user(summary_path, OWNER_USER)
except Exception as e:
logger.error(f"Error processing {md_file.name}: {e}")
return synced
# ==== EXECUTION ====
if __name__ == "__main__":
if check_already_synced() and are_folders_identical(ARUL_SPACE_MEDIA, PRO_MEDIA):
logger.info("☑️ Already in sync. No changes needed.")
else:
synced_files = sync_markdowns()
copy_media_folder()
if synced_files:
for f in synced_files:
logger.info(f"Synced: {f}")
logger.info(f"✅ Synced files: {', '.join(synced_files)}")
else:
logger.info("☑️ No new valid tech/tutorial markdowns to sync.")

View File

@ -0,0 +1,15 @@
#!/bin/bash
ssh -i /home/arul/.ssh/id_rsa root@74.208.74.61 <<EOF
if sudo /usr/local/nginx/sbin/nginx -t; then
echo "✅ Nginx config test passed, reloading..."
if sudo /usr/local/nginx/sbin/nginx -s reload; then
echo "🚀 Nginx reloaded successfully!"
else
echo "❌ Nginx reload failed!"
fi
else
echo "❌ Nginx config test failed!"
fi
EOF