commit f63f01a7fd71ed4d8b5680c53616a9643e0896ea Author: arul Date: Sat Nov 15 19:13:19 2025 +0000 Migration script built initially to connect between Homeserver and Public VPS diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/generate-summaries/summarize.py b/generate-summaries/summarize.py new file mode 100644 index 0000000..9351e76 --- /dev/null +++ b/generate-summaries/summarize.py @@ -0,0 +1,56 @@ +import os +import re +import shutil + +def clean_md_file(input_path, output_path): + with open(input_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Remove YAML frontmatter (with surrounding ---) + content = re.sub(r'^---\n(.*?\n)*?---\n', '', content, flags=re.DOTALL) + + # Remove all Hugo shortcodes like {{< ... >}} or {{< /... >}} + content = re.sub(r'{{<[^>]*>}}', '', content) + + # Remove comments shortcode and everything after + content = re.sub(r'{{<\s*comments\s*>}}.*', '', content, flags=re.DOTALL) + + # Remove trailing shell prompt lines (optional) + content = re.sub(r'^hacker@selfhost1:.*$', '', content, flags=re.MULTILINE) + + # Collapse multiple blank lines to max two + content = re.sub(r'\n{3,}', '\n\n', content) + + # Strip leading/trailing whitespace + content = content.strip() + + # Write cleaned content + with open(output_path, 'w', encoding='utf-8') as f: + f.write(content) + + print(f"Processed: {input_path} -> {output_path}") + +def batch_clean_md_files(): + input_dir = '/var/www/html/professional-site/content/post' + assets_dir = '/var/www/html/professional-site/assets/summaries' + static_dir = '/var/www/html/professional-site/static/summaries' + + os.makedirs(assets_dir, exist_ok=True) + os.makedirs(static_dir, exist_ok=True) + + for filename in os.listdir(input_dir): + if filename.endswith('.md'): + input_path = os.path.join(input_dir, filename) + output_filename = filename.replace('.md', '.txt') + + output_path_assets = os.path.join(assets_dir, output_filename) + output_path_static = os.path.join(static_dir, output_filename) + + clean_md_file(input_path, output_path_assets) + + # Also copy the cleaned file to static folder + shutil.copyfile(output_path_assets, output_path_static) + print(f"Copied summary to static: {output_path_static}") + +batch_clean_md_files() + diff --git a/initiator/check-log b/initiator/check-log new file mode 100755 index 0000000..ed4f796 --- /dev/null +++ b/initiator/check-log @@ -0,0 +1,12 @@ +#!/bin/bash + +LOG_PATH="/var/log/sync-to-pro.log" + +if [ -f "$LOG_PATH" ]; then + echo "πŸ“„ Showing contents of $LOG_PATH:" + cat "$LOG_PATH" +else + echo "⚠️ Log file not found: $LOG_PATH" + exit 1 +fi + diff --git a/initiator/initiate b/initiator/initiate new file mode 100755 index 0000000..20c571b --- /dev/null +++ b/initiator/initiate @@ -0,0 +1,71 @@ +#!/bin/bash + +LOG_FILE="/var/log/hugo_sync.log" +SYNC_TO_PRO_LOG="/var/log/sync-to-pro.log" + +# Run sync-to-pro first every time with logging +echo "πŸ”„ Running sync-to-pro to update log... (logged to $SYNC_TO_PRO_LOG)" +/home/arul/auto-scripts/professional-site-scripts/sync-content-pro-site/sync-to-pro >> "$SYNC_TO_PRO_LOG" 2>&1 + +# Function for deployment chain +run_deployment_chain() { + echo "Changing directory to working path..." + cd /home/arul/auto-scripts/professional-site-scripts || { + echo "❌ Failed to cd to working directory." + exit 1 + } + + echo "1. πŸš€ Generating summaries..." + python3 generate-summaries/summarize.py + + echo "2. πŸ“€ Syncing contents from personal site to professional site..." + sync-content-pro-site/sync-to-pro + + echo "3. 🧐 Checking sync logs..." + sync-content-pro-site/check-sync-logs + + echo "4. πŸ—ΊοΈ Generating redirect map..." + python3 redirect-site-upload-map/generate-nginx-map.py + + echo "5. ☁️ Uploading map via SFTP..." + bash redirect-site-upload-map/upload-map.sftp.sh + + echo "6. πŸ”„ Restarting Hugo Professional site to update latest contents on Local Home Server πŸ πŸ“°" + cd /var/www/html/professional-site/ && sudo hugo + + echo "7. πŸ”ƒ Restarting reverse proxy tunnel in remote tunnel" + cd /home/arul/auto-scripts/professional-site-scripts && bash sync-content-pro-site/tunnel-reverseproxy-restart + + echo "πŸŽ‰ Deployment chain completed" +} + +# Check log for latest sync status +if [ ! -f "$LOG_FILE" ]; then + echo "❌ Log file not found: $LOG_FILE" + exit 1 +fi + +LATEST_TIMESTAMP=$(tac "$LOG_FILE" | grep -m 1 -oP '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' | head -1) + +if [ -z "$LATEST_TIMESTAMP" ]; then + echo "❌ Could not find any timestamp in the log." + exit 1 +fi + +LOG_BLOCK=$(awk -v ts="$LATEST_TIMESTAMP" '{ + if ($0 ~ ts) start=1 + if (start) print +}' "$LOG_FILE") + +if echo "$LOG_BLOCK" | grep -q "βœ… Synced files:"; then + echo "πŸ” Changes detected in latest log block. Proceeding with deployment." + run_deployment_chain + exit 0 +elif echo "$LOG_BLOCK" | grep -q "β˜‘οΈ Already in sync. No changes needed."; then + echo "βœ… No changes detected in latest log block. Skipping deployment." + exit 1 +else + echo "⚠️ Could not determine sync status from latest log block." + exit 1 +fi + diff --git a/redirect-site-upload-map/generate-nginx-map.py b/redirect-site-upload-map/generate-nginx-map.py new file mode 100644 index 0000000..a6150c3 --- /dev/null +++ b/redirect-site-upload-map/generate-nginx-map.py @@ -0,0 +1,57 @@ +import os +import yaml + +VALID_TAGS = {"Tech", "Tech tutorials"} +CONTENT_DIR = "/var/www/html/arulbalaji.xyz/content/journal" +OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "redirect-slugs") +OUTPUT_FILE = os.path.join(OUTPUT_DIR, "tech-blogs.map") + +def extract_tags_and_slug(filepath): + print(f"Processing file: {filepath}") + with open(filepath, "r", encoding="utf-8") as f: + lines = f.readlines() + + if not lines or lines[0].strip() != "---": + print(" - No front matter found.") + return None + + front_matter = [] + for line in lines[1:]: + if line.strip() == "---": + break + front_matter.append(line) + + try: + data = yaml.safe_load(''.join(front_matter)) + tags = set(map(str.strip, data.get("tags", []))) + print(f" - Tags found: {tags}") + if tags & VALID_TAGS: + slug = data.get("slug", os.path.splitext(os.path.basename(filepath))[0]) + print(f" - Matched tags! Returning slug: /journal/{slug}") + return f"/journal/{slug}" + else: + print(" - No matching tags.") + except Exception as e: + print(f"Error parsing {filepath}: {e}") + return None + +def generate_map(): + os.makedirs(OUTPUT_DIR, exist_ok=True) + entries = [] + for filename in os.listdir(CONTENT_DIR): + if filename.endswith(".md"): + path = os.path.join(CONTENT_DIR, filename) + result = extract_tags_and_slug(path) + if result: + entries.append(f"{result} 1;") + + with open(OUTPUT_FILE, "w", encoding="utf-8") as f: + f.write("# Auto-generated by Python\n") + for entry in sorted(entries): + f.write(entry + "\n") + + print(f"βœ… Generated NGINX map at {OUTPUT_FILE} with {len(entries)} entries.") + +if __name__ == "__main__": + generate_map() + diff --git a/redirect-site-upload-map/redirect-slugs/tech-blogs.map b/redirect-site-upload-map/redirect-slugs/tech-blogs.map new file mode 100644 index 0000000..33156a2 --- /dev/null +++ b/redirect-site-upload-map/redirect-slugs/tech-blogs.map @@ -0,0 +1,18 @@ +# Auto-generated by Python +/journal/broken_phone_server 1; +/journal/cyberdeck-build-prototype-2025 1; +/journal/cyberdeck_upgrade-1 1; +/journal/git-tutorial-part1 1; +/journal/git-tutorial-part2 1; +/journal/guacamole-setup-part1 1; +/journal/java_RestApi_Server 1; +/journal/java_RestApi_Server-part2 1; +/journal/java_RestApi_Server-part3 1; +/journal/learning_Webapp_Exploitation 1; +/journal/no_ui_project 1; +/journal/no_ui_youtube_subscribe 1; +/journal/self_hosting_platforms 1; +/journal/selfhosted_mail_server_issues 1; +/journal/selfhosting_on_Premise 1; +/journal/tor-router 1; +/journal/youtube-restriction-bypass 1; diff --git a/redirect-site-upload-map/upload-map.sftp.sh b/redirect-site-upload-map/upload-map.sftp.sh new file mode 100755 index 0000000..cfdbf04 --- /dev/null +++ b/redirect-site-upload-map/upload-map.sftp.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +LOCAL_FILE="/home/arul/auto-scripts/professional-site-scripts/redirect-site-upload-map/redirect-slugs/tech-blogs.map" +REMOTE_USER="root" +REMOTE_HOST="74.208.74.61" +REMOTE_FILE="/root/redirect-slugs/tech-blogs.map" +KEY_PATH="/home/arul/.ssh/id_rsa" + +sftp -i "$KEY_PATH" ${REMOTE_USER}@${REMOTE_HOST} <]+>}}", "", content) + +def files_are_identical(file1, file2): + try: + if os.path.getsize(file1) != os.path.getsize(file2): + return False + return filecmp.cmp(file1, file2, shallow=False) + except Exception as e: + logger.warning(f"Error comparing files {file1} and {file2}: {e}") + return False + +# ==== TRANSFORMATION ==== + +def transform_file(file_path): + try: + with open(file_path, "r") as f: + lines = f.readlines() + except PermissionError: + logger.error(f"Permission denied: {file_path}") + return None, None + + if any(part.startswith('.') for part in file_path.parts): + return None, None + + front, body_start = parse_frontmatter(lines) + if not has_valid_tags(front): + return None, None + + description, featured_image = extract_meta_comments(lines) + title = front.get("title", "Untitled") + raw_date = front.get("date", "1970-01-01") + formatted_date = f"{raw_date}T00:00:00+05:30" + + body = "".join(lines[body_start:]).strip() + + new_frontmatter = f"""--- +date: {formatted_date} +description: "{description or ''}" +featured_image: "{featured_image or ''}" +title: "{title}" +--- +""" + + full_markdown = new_frontmatter + "\n\n" + body + plain_text_summary = strip_shortcodes(body).strip() + + return full_markdown, plain_text_summary + +# ==== MEDIA FOLDER COMPARISON ==== + +def are_folders_identical(folder1, folder2): + def get_all_files(base_folder): + file_set = set() + for root, _, files in os.walk(base_folder): + for f in files: + full_path = os.path.join(root, f) + rel_path = os.path.relpath(full_path, base_folder) + file_set.add(rel_path) + return file_set + + if not os.path.exists(folder1) or not os.path.exists(folder2): + return False + + files1 = get_all_files(folder1) + files2 = get_all_files(folder2) + + if files1 != files2: + return False + + for rel_file in files1: + file1 = os.path.join(folder1, rel_file) + file2 = os.path.join(folder2, rel_file) + if not files_are_identical(file1, file2): + return False + + return True + +# ==== MAIN SYNC CHECK ==== + +def check_already_synced(): + for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"): + transformed, _ = transform_file(md_file) + if not transformed: + continue + pro_file_path = Path(PRO_CONTENT) / md_file.name + if not pro_file_path.exists(): + return False + try: + with open(pro_file_path, "r") as f: + pro_content = f.read() + if pro_content != transformed: + return False + except Exception as e: + logger.warning(f"Could not read {pro_file_path}: {e}") + return False + return True + +# ==== MEDIA SYNC ==== + +def copy_media_folder(): + if not os.path.exists(ARUL_SPACE_MEDIA): + logger.warning(f"Media folder not found in Arul's space at {ARUL_SPACE_MEDIA}") + return + try: + if os.path.exists(PRO_MEDIA): + shutil.rmtree(PRO_MEDIA) + shutil.copytree(ARUL_SPACE_MEDIA, PRO_MEDIA, dirs_exist_ok=True) + if OWNER_USER: + for root, dirs, files in os.walk(PRO_MEDIA): + for name in files: + chown_to_user(os.path.join(root, name), OWNER_USER) + logger.info("πŸ“ Media folder synced from Arul's space β†’ professional site") + except Exception as e: + logger.error(f"Failed to sync media folder from Arul's space: {e}") + +# ==== MAIN SYNC ==== + +def sync_markdowns(): + synced = [] + Path(PRO_CONTENT).mkdir(parents=True, exist_ok=True) + Path(SUMMARY_FOLDER).mkdir(parents=True, exist_ok=True) + + for md_file in Path(ARUL_SPACE_CONTENT).rglob("*.md"): + try: + transformed, summary = transform_file(md_file) + if transformed: + out_path = Path(PRO_CONTENT) / md_file.name + with open(out_path, "w") as out_file: + out_file.write(transformed) + if OWNER_USER: + chown_to_user(out_path, OWNER_USER) + synced.append(str(md_file.name)) + + summary_path = Path(SUMMARY_FOLDER) / (md_file.stem + ".txt") + with open(summary_path, "w") as sum_file: + sum_file.write(summary) + if OWNER_USER: + chown_to_user(summary_path, OWNER_USER) + except Exception as e: + logger.error(f"Error processing {md_file.name}: {e}") + return synced + +# ==== EXECUTION ==== + +if __name__ == "__main__": + if check_already_synced() and are_folders_identical(ARUL_SPACE_MEDIA, PRO_MEDIA): + logger.info("β˜‘οΈ Already in sync. No changes needed.") + else: + synced_files = sync_markdowns() + copy_media_folder() + + if synced_files: + for f in synced_files: + logger.info(f"Synced: {f}") + logger.info(f"βœ… Synced files: {', '.join(synced_files)}") + else: + logger.info("β˜‘οΈ No new valid tech/tutorial markdowns to sync.") + diff --git a/sync-content-pro-site/tunnel-reverseproxy-restart b/sync-content-pro-site/tunnel-reverseproxy-restart new file mode 100755 index 0000000..bc8ffd8 --- /dev/null +++ b/sync-content-pro-site/tunnel-reverseproxy-restart @@ -0,0 +1,15 @@ +#!/bin/bash + +ssh -i /home/arul/.ssh/id_rsa root@74.208.74.61 <