Migration script built initially to connect between Homeserver and Public VPS
This commit is contained in:
commit
f63f01a7fd
56
generate-summaries/summarize.py
Normal file
56
generate-summaries/summarize.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
||||
def clean_md_file(input_path, output_path):
|
||||
with open(input_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Remove YAML frontmatter (with surrounding ---)
|
||||
content = re.sub(r'^---\n(.*?\n)*?---\n', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove all Hugo shortcodes like {{< ... >}} or {{< /... >}}
|
||||
content = re.sub(r'{{<[^>]*>}}', '', content)
|
||||
|
||||
# Remove comments shortcode and everything after
|
||||
content = re.sub(r'{{<\s*comments\s*>}}.*', '', content, flags=re.DOTALL)
|
||||
|
||||
# Remove trailing shell prompt lines (optional)
|
||||
content = re.sub(r'^hacker@selfhost1:.*$', '', content, flags=re.MULTILINE)
|
||||
|
||||
# Collapse multiple blank lines to max two
|
||||
content = re.sub(r'\n{3,}', '\n\n', content)
|
||||
|
||||
# Strip leading/trailing whitespace
|
||||
content = content.strip()
|
||||
|
||||
# Write cleaned content
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
print(f"Processed: {input_path} -> {output_path}")
|
||||
|
||||
def batch_clean_md_files():
|
||||
input_dir = '/var/www/html/professional-site/content/post'
|
||||
assets_dir = '/var/www/html/professional-site/assets/summaries'
|
||||
static_dir = '/var/www/html/professional-site/static/summaries'
|
||||
|
||||
os.makedirs(assets_dir, exist_ok=True)
|
||||
os.makedirs(static_dir, exist_ok=True)
|
||||
|
||||
for filename in os.listdir(input_dir):
|
||||
if filename.endswith('.md'):
|
||||
input_path = os.path.join(input_dir, filename)
|
||||
output_filename = filename.replace('.md', '.txt')
|
||||
|
||||
output_path_assets = os.path.join(assets_dir, output_filename)
|
||||
output_path_static = os.path.join(static_dir, output_filename)
|
||||
|
||||
clean_md_file(input_path, output_path_assets)
|
||||
|
||||
# Also copy the cleaned file to static folder
|
||||
shutil.copyfile(output_path_assets, output_path_static)
|
||||
print(f"Copied summary to static: {output_path_static}")
|
||||
|
||||
batch_clean_md_files()
|
||||
|
||||
12
initiator/check-log
Executable file
12
initiator/check-log
Executable file
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
LOG_PATH="/var/log/sync-to-pro.log"
|
||||
|
||||
if [ -f "$LOG_PATH" ]; then
|
||||
echo "📄 Showing contents of $LOG_PATH:"
|
||||
cat "$LOG_PATH"
|
||||
else
|
||||
echo "⚠️ Log file not found: $LOG_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
71
initiator/initiate
Executable file
71
initiator/initiate
Executable file
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash
|
||||
|
||||
LOG_FILE="/var/log/hugo_sync.log"
|
||||
SYNC_TO_PRO_LOG="/var/log/sync-to-pro.log"
|
||||
|
||||
# Run sync-to-pro first every time with logging
|
||||
echo "🔄 Running sync-to-pro to update log... (logged to $SYNC_TO_PRO_LOG)"
|
||||
/home/arul/auto-scripts/professional-site-scripts/sync-content-pro-site/sync-to-pro >> "$SYNC_TO_PRO_LOG" 2>&1
|
||||
|
||||
# Function for deployment chain
|
||||
run_deployment_chain() {
|
||||
echo "Changing directory to working path..."
|
||||
cd /home/arul/auto-scripts/professional-site-scripts || {
|
||||
echo "❌ Failed to cd to working directory."
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "1. 🚀 Generating summaries..."
|
||||
python3 generate-summaries/summarize.py
|
||||
|
||||
echo "2. 📤 Syncing contents from personal site to professional site..."
|
||||
sync-content-pro-site/sync-to-pro
|
||||
|
||||
echo "3. 🧐 Checking sync logs..."
|
||||
sync-content-pro-site/check-sync-logs
|
||||
|
||||
echo "4. 🗺️ Generating redirect map..."
|
||||
python3 redirect-site-upload-map/generate-nginx-map.py
|
||||
|
||||
echo "5. ☁️ Uploading map via SFTP..."
|
||||
bash redirect-site-upload-map/upload-map.sftp.sh
|
||||
|
||||
echo "6. 🔄 Restarting Hugo Professional site to update latest contents on Local Home Server 🏠📰"
|
||||
cd /var/www/html/professional-site/ && sudo hugo
|
||||
|
||||
echo "7. 🔃 Restarting reverse proxy tunnel in remote tunnel"
|
||||
cd /home/arul/auto-scripts/professional-site-scripts && bash sync-content-pro-site/tunnel-reverseproxy-restart
|
||||
|
||||
echo "🎉 Deployment chain completed"
|
||||
}
|
||||
|
||||
# Check log for latest sync status
|
||||
if [ ! -f "$LOG_FILE" ]; then
|
||||
echo "❌ Log file not found: $LOG_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
LATEST_TIMESTAMP=$(tac "$LOG_FILE" | grep -m 1 -oP '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' | head -1)
|
||||
|
||||
if [ -z "$LATEST_TIMESTAMP" ]; then
|
||||
echo "❌ Could not find any timestamp in the log."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
LOG_BLOCK=$(awk -v ts="$LATEST_TIMESTAMP" '{
|
||||
if ($0 ~ ts) start=1
|
||||
if (start) print
|
||||
}' "$LOG_FILE")
|
||||
|
||||
if echo "$LOG_BLOCK" | grep -q "✅ Synced files:"; then
|
||||
echo "🔁 Changes detected in latest log block. Proceeding with deployment."
|
||||
run_deployment_chain
|
||||
exit 0
|
||||
elif echo "$LOG_BLOCK" | grep -q "☑️ Already in sync. No changes needed."; then
|
||||
echo "✅ No changes detected in latest log block. Skipping deployment."
|
||||
exit 1
|
||||
else
|
||||
echo "⚠️ Could not determine sync status from latest log block."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
57
redirect-site-upload-map/generate-nginx-map.py
Normal file
57
redirect-site-upload-map/generate-nginx-map.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
import os
|
||||
import yaml
|
||||
|
||||
VALID_TAGS = {"Tech", "Tech tutorials"}
|
||||
CONTENT_DIR = "/var/www/html/arulbalaji.xyz/content/journal"
|
||||
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "redirect-slugs")
|
||||
OUTPUT_FILE = os.path.join(OUTPUT_DIR, "tech-blogs.map")
|
||||
|
||||
def extract_tags_and_slug(filepath):
|
||||
print(f"Processing file: {filepath}")
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
if not lines or lines[0].strip() != "---":
|
||||
print(" - No front matter found.")
|
||||
return None
|
||||
|
||||
front_matter = []
|
||||
for line in lines[1:]:
|
||||
if line.strip() == "---":
|
||||
break
|
||||
front_matter.append(line)
|
||||
|
||||
try:
|
||||
data = yaml.safe_load(''.join(front_matter))
|
||||
tags = set(map(str.strip, data.get("tags", [])))
|
||||
print(f" - Tags found: {tags}")
|
||||
if tags & VALID_TAGS:
|
||||
slug = data.get("slug", os.path.splitext(os.path.basename(filepath))[0])
|
||||
print(f" - Matched tags! Returning slug: /journal/{slug}")
|
||||
return f"/journal/{slug}"
|
||||
else:
|
||||
print(" - No matching tags.")
|
||||
except Exception as e:
|
||||
print(f"Error parsing {filepath}: {e}")
|
||||
return None
|
||||
|
||||
def generate_map():
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
entries = []
|
||||
for filename in os.listdir(CONTENT_DIR):
|
||||
if filename.endswith(".md"):
|
||||
path = os.path.join(CONTENT_DIR, filename)
|
||||
result = extract_tags_and_slug(path)
|
||||
if result:
|
||||
entries.append(f"{result} 1;")
|
||||
|
||||
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
|
||||
f.write("# Auto-generated by Python\n")
|
||||
for entry in sorted(entries):
|
||||
f.write(entry + "\n")
|
||||
|
||||
print(f"✅ Generated NGINX map at {OUTPUT_FILE} with {len(entries)} entries.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_map()
|
||||
|
||||
18
redirect-site-upload-map/redirect-slugs/tech-blogs.map
Normal file
18
redirect-site-upload-map/redirect-slugs/tech-blogs.map
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Auto-generated by Python
|
||||
/journal/broken_phone_server 1;
|
||||
/journal/cyberdeck-build-prototype-2025 1;
|
||||
/journal/cyberdeck_upgrade-1 1;
|
||||
/journal/git-tutorial-part1 1;
|
||||
/journal/git-tutorial-part2 1;
|
||||
/journal/guacamole-setup-part1 1;
|
||||
/journal/java_RestApi_Server 1;
|
||||
/journal/java_RestApi_Server-part2 1;
|
||||
/journal/java_RestApi_Server-part3 1;
|
||||
/journal/learning_Webapp_Exploitation 1;
|
||||
/journal/no_ui_project 1;
|
||||
/journal/no_ui_youtube_subscribe 1;
|
||||
/journal/self_hosting_platforms 1;
|
||||
/journal/selfhosted_mail_server_issues 1;
|
||||
/journal/selfhosting_on_Premise 1;
|
||||
/journal/tor-router 1;
|
||||
/journal/youtube-restriction-bypass 1;
|
||||
13
redirect-site-upload-map/upload-map.sftp.sh
Executable file
13
redirect-site-upload-map/upload-map.sftp.sh
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/bash
|
||||
|
||||
LOCAL_FILE="/home/arul/auto-scripts/professional-site-scripts/redirect-site-upload-map/redirect-slugs/tech-blogs.map"
|
||||
REMOTE_USER="root"
|
||||
REMOTE_HOST="74.208.74.61"
|
||||
REMOTE_FILE="/root/redirect-slugs/tech-blogs.map"
|
||||
KEY_PATH="/home/arul/.ssh/id_rsa"
|
||||
|
||||
sftp -i "$KEY_PATH" ${REMOTE_USER}@${REMOTE_HOST} <<EOF
|
||||
put ${LOCAL_FILE} ${REMOTE_FILE}
|
||||
bye
|
||||
EOF
|
||||
|
||||
27
sync-content-pro-site/check-sync-logs
Executable file
27
sync-content-pro-site/check-sync-logs
Executable file
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash
|
||||
|
||||
LOG_FILE="/var/log/hugo_sync.log"
|
||||
|
||||
echo "🔍 Checking Hugo Sync Log: $LOG_FILE"
|
||||
|
||||
# Check if log file exists
|
||||
if [[ ! -f "$LOG_FILE" ]]; then
|
||||
echo "❌ Log file not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🗂️ Recent Sync Summary:"
|
||||
echo "--------------------------"
|
||||
|
||||
# Show last 10 sync entries
|
||||
tail -n 10 "$LOG_FILE" | grep 'Synced:' || echo "No recent sync entries."
|
||||
|
||||
echo ""
|
||||
echo "📅 Last Sync Time:"
|
||||
stat -c "Last modified: %y" "$LOG_FILE"
|
||||
|
||||
echo ""
|
||||
echo "📈 Total sync entries so far:"
|
||||
grep -c 'Synced:' "$LOG_FILE"
|
||||
|
||||
237
sync-content-pro-site/sync-to-pro
Executable file
237
sync-content-pro-site/sync-to-pro
Executable file
|
|
@ -0,0 +1,237 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import pwd
|
||||
import grp
|
||||
import filecmp
|
||||
|
||||
# ==== CONFIG ====
|
||||
ARUL_SPACE_CONTENT = "/var/www/html/arulbalaji.xyz/content/journal"
|
||||
PRO_CONTENT = "/var/www/html/professional-site/content/post"
|
||||
ARUL_SPACE_MEDIA = "/var/www/html/arulbalaji.xyz/static/media"
|
||||
PRO_MEDIA = "/var/www/html/professional-site/static/media"
|
||||
SUMMARY_FOLDER = "/var/www/html/professional-site/assets/summaries"
|
||||
LOG_FILE = "/var/log/hugo_sync.log"
|
||||
VALID_TAGS = {"Tech", "Tech tutorials"}
|
||||
OWNER_USER = "arul" # Set to None to skip chown
|
||||
|
||||
# ==== LOGGER SETUP ====
|
||||
logger = logging.getLogger("SyncLogger")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
|
||||
|
||||
file_handler = logging.FileHandler(LOG_FILE)
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(formatter)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
# ==== HELPERS ====
|
||||
|
||||
def parse_frontmatter(lines):
|
||||
front = {}
|
||||
in_frontmatter = False
|
||||
body_start = 0
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip() == "---":
|
||||
if not in_frontmatter:
|
||||
in_frontmatter = True
|
||||
else:
|
||||
body_start = i + 1
|
||||
break
|
||||
elif in_frontmatter and ':' in line:
|
||||
key, value = line.split(':', 1)
|
||||
front[key.strip()] = value.strip().strip('"')
|
||||
return front, body_start
|
||||
|
||||
def extract_meta_comments(lines):
|
||||
description = None
|
||||
featured = None
|
||||
for line in lines:
|
||||
if "#pro-site description:" in line:
|
||||
description = line.split(":", 1)[1].strip()
|
||||
if "#pro-site featured_image:" in line:
|
||||
featured = line.split(":", 1)[1].strip()
|
||||
return description, featured
|
||||
|
||||
def has_valid_tags(front):
|
||||
tags_line = front.get("tags", "")
|
||||
tags = re.findall(r'"(.*?)"', tags_line)
|
||||
return any(tag in VALID_TAGS for tag in tags)
|
||||
|
||||
def chown_to_user(file_path, username):
|
||||
if not username:
|
||||
return
|
||||
try:
|
||||
uid = pwd.getpwnam(username).pw_uid
|
||||
gid = grp.getgrnam(username).gr_gid
|
||||
os.chown(file_path, uid, gid)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not chown {file_path}: {e}")
|
||||
|
||||
def strip_shortcodes(content):
|
||||
return re.sub(r"{{<[^>]+>}}", "", content)
|
||||
|
||||
def files_are_identical(file1, file2):
|
||||
try:
|
||||
if os.path.getsize(file1) != os.path.getsize(file2):
|
||||
return False
|
||||
return filecmp.cmp(file1, file2, shallow=False)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error comparing files {file1} and {file2}: {e}")
|
||||
return False
|
||||
|
||||
# ==== TRANSFORMATION ====
|
||||
|
||||
def transform_file(file_path):
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
lines = f.readlines()
|
||||
except PermissionError:
|
||||
logger.error(f"Permission denied: {file_path}")
|
||||
return None, None
|
||||
|
||||
if any(part.startswith('.') for part in file_path.parts):
|
||||
return None, None
|
||||
|
||||
front, body_start = parse_frontmatter(lines)
|
||||
if not has_valid_tags(front):
|
||||
return None, None
|
||||
|
||||
description, featured_image = extract_meta_comments(lines)
|
||||
title = front.get("title", "Untitled")
|
||||
raw_date = front.get("date", "1970-01-01")
|
||||
formatted_date = f"{raw_date}T00:00:00+05:30"
|
||||
|
||||
body = "".join(lines[body_start:]).strip()
|
||||
|
||||
new_frontmatter = f"""---
|
||||
date: {formatted_date}
|
||||
description: "{description or ''}"
|
||||
featured_image: "{featured_image or ''}"
|
||||
title: "{title}"
|
||||
---
|
||||
"""
|
||||
|
||||
full_markdown = new_frontmatter + "\n\n" + body
|
||||
plain_text_summary = strip_shortcodes(body).strip()
|
||||
|
||||
return full_markdown, plain_text_summary
|
||||
|
||||
# ==== MEDIA FOLDER COMPARISON ====
|
||||
|
||||
def are_folders_identical(folder1, folder2):
|
||||
def get_all_files(base_folder):
|
||||
file_set = set()
|
||||
for root, _, files in os.walk(base_folder):
|
||||
for f in files:
|
||||
full_path = os.path.join(root, f)
|
||||
rel_path = os.path.relpath(full_path, base_folder)
|
||||
file_set.add(rel_path)
|
||||
return file_set
|
||||
|
||||
if not os.path.exists(folder1) or not os.path.exists(folder2):
|
||||
return False
|
||||
|
||||
files1 = get_all_files(folder1)
|
||||
files2 = get_all_files(folder2)
|
||||
|
||||
if files1 != files2:
|
||||
return False
|
||||
|
||||
for rel_file in files1:
|
||||
file1 = os.path.join(folder1, rel_file)
|
||||
file2 = os.path.join(folder2, rel_file)
|
||||
if not files_are_identical(file1, file2):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# ==== MAIN SYNC CHECK ====
|
||||
|
||||
def check_already_synced():
|
||||
for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"):
|
||||
transformed, _ = transform_file(md_file)
|
||||
if not transformed:
|
||||
continue
|
||||
pro_file_path = Path(PRO_CONTENT) / md_file.name
|
||||
if not pro_file_path.exists():
|
||||
return False
|
||||
try:
|
||||
with open(pro_file_path, "r") as f:
|
||||
pro_content = f.read()
|
||||
if pro_content != transformed:
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not read {pro_file_path}: {e}")
|
||||
return False
|
||||
return True
|
||||
|
||||
# ==== MEDIA SYNC ====
|
||||
|
||||
def copy_media_folder():
|
||||
if not os.path.exists(ARUL_SPACE_MEDIA):
|
||||
logger.warning(f"Media folder not found in Arul's space at {ARUL_SPACE_MEDIA}")
|
||||
return
|
||||
try:
|
||||
if os.path.exists(PRO_MEDIA):
|
||||
shutil.rmtree(PRO_MEDIA)
|
||||
shutil.copytree(ARUL_SPACE_MEDIA, PRO_MEDIA, dirs_exist_ok=True)
|
||||
if OWNER_USER:
|
||||
for root, dirs, files in os.walk(PRO_MEDIA):
|
||||
for name in files:
|
||||
chown_to_user(os.path.join(root, name), OWNER_USER)
|
||||
logger.info("📁 Media folder synced from Arul's space → professional site")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to sync media folder from Arul's space: {e}")
|
||||
|
||||
# ==== MAIN SYNC ====
|
||||
|
||||
def sync_markdowns():
|
||||
synced = []
|
||||
Path(PRO_CONTENT).mkdir(parents=True, exist_ok=True)
|
||||
Path(SUMMARY_FOLDER).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"):
|
||||
try:
|
||||
transformed, summary = transform_file(md_file)
|
||||
if transformed:
|
||||
out_path = Path(PRO_CONTENT) / md_file.name
|
||||
with open(out_path, "w") as out_file:
|
||||
out_file.write(transformed)
|
||||
if OWNER_USER:
|
||||
chown_to_user(out_path, OWNER_USER)
|
||||
synced.append(str(md_file.name))
|
||||
|
||||
summary_path = Path(SUMMARY_FOLDER) / (md_file.stem + ".txt")
|
||||
with open(summary_path, "w") as sum_file:
|
||||
sum_file.write(summary)
|
||||
if OWNER_USER:
|
||||
chown_to_user(summary_path, OWNER_USER)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {md_file.name}: {e}")
|
||||
return synced
|
||||
|
||||
# ==== EXECUTION ====
|
||||
|
||||
if __name__ == "__main__":
|
||||
if check_already_synced() and are_folders_identical(ARUL_SPACE_MEDIA, PRO_MEDIA):
|
||||
logger.info("☑️ Already in sync. No changes needed.")
|
||||
else:
|
||||
synced_files = sync_markdowns()
|
||||
copy_media_folder()
|
||||
|
||||
if synced_files:
|
||||
for f in synced_files:
|
||||
logger.info(f"Synced: {f}")
|
||||
logger.info(f"✅ Synced files: {', '.join(synced_files)}")
|
||||
else:
|
||||
logger.info("☑️ No new valid tech/tutorial markdowns to sync.")
|
||||
|
||||
15
sync-content-pro-site/tunnel-reverseproxy-restart
Executable file
15
sync-content-pro-site/tunnel-reverseproxy-restart
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
|
||||
ssh -i /home/arul/.ssh/id_rsa root@74.208.74.61 <<EOF
|
||||
if sudo /usr/local/nginx/sbin/nginx -t; then
|
||||
echo "✅ Nginx config test passed, reloading..."
|
||||
if sudo /usr/local/nginx/sbin/nginx -s reload; then
|
||||
echo "🚀 Nginx reloaded successfully!"
|
||||
else
|
||||
echo "❌ Nginx reload failed!"
|
||||
fi
|
||||
else
|
||||
echo "❌ Nginx config test failed!"
|
||||
fi
|
||||
EOF
|
||||
|
||||
Loading…
Reference in New Issue
Block a user