commit 76ed3330cc957de3c1c50bd230b3000dd2a69a44 Author: arul Date: Fri Dec 12 02:27:50 2025 +0530 mirroring code to mirror the public repos from gitea to github diff --git a/README.md b/README.md new file mode 100644 index 0000000..a037404 --- /dev/null +++ b/README.md @@ -0,0 +1,211 @@ +Ah! Perfect — now I fully understand the **exact intended order and logic**. Let’s make a `README.md` that **strictly follows the execution order you outlined**, including handling normal repos first, then LFS-based repos for any failed/large repos. + +Here’s the updated README: + +--- + +````markdown +# GitHub Contribution Heatmap Updater (Mirror & LFS) + +This repository contains scripts to: + +1. Mirror repositories from Gitea to GitHub. +2. Rewrite commit emails to your GitHub account for heatmap updates. +3. Handle large/LFS-based repositories that fail normal mirroring. + +--- + +## Full Workflow + +### Step 1: Mirror normal repositories from Gitea to GitHub + +Run the main mirroring script to copy all normal repositories from your Gitea account to GitHub. + +```bash +python gitea-github-mirror.py +```` + +**What it does:** + +* Fetches all public repositories from Gitea. +* Creates corresponding repositories on GitHub (if they don’t already exist). +* Mirrors all branches and tags to GitHub. + +> ⚠️ If some repositories fail during this step (typically large or LFS-based repos), they will be handled in later steps. + +--- + +### Step 2: Update contribution graph for successfully mirrored repositories + +After mirroring, update commits and push to GitHub to reflect contributions on your heatmap. + +```bash +python mirror-contribution-graph.py +``` + +**Example:** + +```bash +python mirror-contribution-graph.py +``` + +**What it does:** + +1. Fetches the latest commits from the local mirrored repo. +2. Rewrites all commit author emails to your GitHub email. +3. Pushes all branches and tags to GitHub, updating your contribution heatmap. + +> ⚠️ This step is for **normal (non-LFS) repositories**. + +--- + +### Step 3: Push failed/LFS repositories + +For repositories that failed in Step 1 (usually large/LFS-based), use the `lfs-push-repo.sh` script. + +**Run:** + +```bash +sh lfs-push-repo.sh +``` + +**Example:** + +```bash +sh lfs-push-repo.sh solo-level-app-automation https://gitea.domain.com/user/project.git https://github.com/user/project.git +``` + +**Arguments:** + +* ``: Repository name. +* ``: Original repository URL on Gitea. +* ``: GitHub repository URL already created by the mirroring script. + +> ✅ This ensures LFS files and history are pushed to the GitHub repository created in Step 1. + +--- + +### Step 4: Update contribution graph for LFS repositories + +After pushing the LFS repository, rewrite commits to your GitHub email for the heatmap. + +**Run:** + +```bash +python mirror-lfs-contribution-graph.py +``` + +**Example:** + +```bash +python mirror-lfs-contribution-graph.py solo-level-app-automation ./git_repo_project_files/solo-level-app-automation/ +``` + +**What it does:** + +1. Fetches the latest commits from the local LFS repo. +2. Rewrites all commit emails to your GitHub email. +3. Pushes all branches and tags to GitHub, updating your contribution heatmap. + +--- + +## Configuration + +Set GitHub credentials inside the Python scripts: + +```python +GITHUB_USER = "" +GITHUB_EMAIL = "" +GITHUB_TOKEN = "" +``` + +* The PAT must have **repo permissions** to push commits. + +--- + +## Logs + +* Each run generates a timestamped log file. + + * Example: `mirror_log_20251212_001638.log` +* Logs include: + + * Repositories processed + * Commit rewriting info + * Push status + * Any errors + +--- + +## Notes + +* Install `git-lfs` + +```bash +sudo apt update +sudo apt install git-lfs +git lfs install +``` + +* Install `git-filter-repo` for Python scripts: + +```bash +pip install git-filter-repo +``` + +* Commits preserve timestamps but rewrite author emails. +* Pushing with `--mirror` overwrites remote branches/tags — **use carefully**. + +--- + +## Example Directory Layout + +``` +project-root/ +│ +├─ gitea-github-mirror.py +├─ mirror-contribution-graph.py +├─ lfs-push-repo.sh +├─ mirror-lfs-contribution-graph.py +├─ gitea_repos/ # mirrored normal non bare repo's (non working dir) +│ ├─ repo1 +│ ├─ repo2 +│ └─ ... +└─ git_repo_project_files/ # LFS / working repos + ├─ lfs-repo1 + ├─ lfs-repo2 + └─ ... +``` + +--- + +## Summary of Execution Order + +1. **Mirror normal repositories:** + + ```bash + python gitea-github-mirror.py + ``` + +2. **Update contribution graph for mirrored normal repos:** + + ```bash + python mirror-contribution-graph.py + ``` + +3. **Push failed/LFS repositories:** + + ```bash + sh lfs-push-repo.sh + ``` + +4. **Update contribution graph for LFS repositories:** + + ```bash + python mirror-lfs-contribution-graph.py + ``` + +> Following this order ensures **all normal and LFS-based repositories** are mirrored, commits rewritten, and contribution heatmap updated correctly. + + + diff --git a/gitea-github-mirror-santize.py b/gitea-github-mirror-santize.py new file mode 100644 index 0000000..da65b64 --- /dev/null +++ b/gitea-github-mirror-santize.py @@ -0,0 +1,124 @@ +import requests +import subprocess +import os +import logging +import re +import unicodedata +from datetime import datetime + +#This code does gitea to github mirroring with description sanitization + +# ----------------- CONFIGURATION ----------------- +GITEA_URL = "" +GITEA_TOKEN = "" +GITEA_USER = "" + +GITHUB_TOKEN = "" +GITHUB_USER = "" # your GitHub username/org + +CLONE_DIR = "./gitea_repos" # Temporary folder for cloning +# ------------------------------------------------- + +# Setup logging +log_filename = f"mirror_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(log_filename), + logging.StreamHandler() + ] +) + +headers_gitea = {"Authorization": f"token {GITEA_TOKEN}"} +os.makedirs(CLONE_DIR, exist_ok=True) + +# ----------------- SANITIZER ----------------- +def sanitize_description(text: str) -> str: + if not text: + return "" + + # Remove ASCII control chars except: tab(9), LF(10), CR(13) + text = re.sub(r"[\x00-\x08\x0B\x0C\x0E-\x1F]", " ", text) + + # Remove Unicode control characters from all languages + text = "".join(ch if unicodedata.category(ch) != "Cc" else " " for ch in text) + + # Collapse multiple spaces + text = re.sub(r"\s+", " ", text).strip() + return text +# ------------------------------------------------- + +# Step 1: Get all public Gitea repos +def get_gitea_repos(): + logging.info("Fetching repositories from Gitea...") + repos = [] + page = 1 + while True: + url = f"{GITEA_URL}/api/v1/users/{GITEA_USER}/repos?page={page}&limit=100" + r = requests.get(url, headers=headers_gitea) + if r.status_code != 200: + logging.error(f"Failed to fetch Gitea repos: {r.text}") + break + data = r.json() + if not data: + break + public_repos = [repo for repo in data if not repo.get("private", False)] + repos.extend(public_repos) + page += 1 + logging.info(f"Found {len(repos)} public repositories on Gitea.") + return repos + +# Step 2: Create GitHub repo +def create_github_repo(repo_name, description=""): + safe_desc = sanitize_description(description) + + url = f"https://api.github.com/user/repos" + payload = {"name": repo_name, "private": False, "description": safe_desc} + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + + r = requests.post(url, json=payload, headers=headers) + if r.status_code == 201: + logging.info(f"GitHub repo created: {repo_name}") + return True + elif r.status_code == 422 and "already exists" in r.text: + logging.info(f"GitHub repo already exists: {repo_name}") + return True + else: + logging.error(f"Failed to create GitHub repo {repo_name}: {r.status_code} - {r.text}") + return False + +# Step 3: Mirror from Gitea → GitHub +def mirror_repo(repo_name, clone_url): + local_path = os.path.join(CLONE_DIR, repo_name) + try: + if os.path.exists(local_path): + logging.info(f"Repo {repo_name} already cloned. Fetching updates...") + subprocess.run(["git", "--git-dir", f"{local_path}/.git", "fetch", "--all"], check=True) + else: + logging.info(f"Cloning {repo_name} from Gitea...") + subprocess.run(["git", "clone", "--mirror", clone_url, local_path], check=True) + + github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git" + logging.info(f"Pushing {repo_name} to GitHub...") + subprocess.run(["git", "--git-dir", local_path, "push", "--mirror", github_url], check=True) + logging.info(f"✅ Finished mirroring {repo_name}") + + except subprocess.CalledProcessError as e: + logging.error(f"❌ Error mirroring {repo_name}: {e}") + +def main(): + gitea_repos = get_gitea_repos() + + for repo in gitea_repos: + repo_name = repo["name"] + description = repo.get("description", "") + clone_url = repo["clone_url"] + + logging.info(f"Processing repo: {repo_name}") + if create_github_repo(repo_name, description): + mirror_repo(repo_name, clone_url) + +if __name__ == "__main__": + main() + logging.info(f"All done! Log saved to {log_filename}") diff --git a/gitea-github-mirror.py b/gitea-github-mirror.py new file mode 100644 index 0000000..d47f2b8 --- /dev/null +++ b/gitea-github-mirror.py @@ -0,0 +1,102 @@ +import requests +import subprocess +import os +import logging +from datetime import datetime + +# ----------------- CONFIGURATION ----------------- +GITEA_URL = "" +GITEA_TOKEN = "" +GITEA_USER = "" + +GITHUB_TOKEN = "" +GITHUB_USER = "" # your GitHub username/org + +CLONE_DIR = "./gitea_repos" # Temporary folder for cloning +# ------------------------------------------------- + +# Setup logging +log_filename = f"mirror_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(log_filename), + logging.StreamHandler() + ] +) + +headers_gitea = {"Authorization": f"token {GITEA_TOKEN}"} +os.makedirs(CLONE_DIR, exist_ok=True) + +# Step 1: Get all public Gitea repos +def get_gitea_repos(): + logging.info("Fetching repositories from Gitea...") + repos = [] + page = 1 + while True: + url = f"{GITEA_URL}/api/v1/users/{GITEA_USER}/repos?page={page}&limit=100" + r = requests.get(url, headers=headers_gitea) + if r.status_code != 200: + logging.error(f"Failed to fetch Gitea repos: {r.text}") + break + data = r.json() + if not data: + break + # Only keep public repos + public_repos = [repo for repo in data if not repo.get("private", False)] + repos.extend(public_repos) + page += 1 + logging.info(f"Found {len(repos)} public repositories on Gitea.") + return repos + +# Step 2: Create GitHub repo +def create_github_repo(repo_name, description=""): + url = f"https://api.github.com/user/repos" + payload = {"name": repo_name, "private": False, "description": description} + headers = {"Authorization": f"token {GITHUB_TOKEN}"} + r = requests.post(url, json=payload, headers=headers) + if r.status_code == 201: + logging.info(f"GitHub repo created: {repo_name}") + return True + elif r.status_code == 422 and "already exists" in r.text: + logging.info(f"GitHub repo already exists: {repo_name}") + return True + else: + logging.error(f"Failed to create GitHub repo {repo_name}: {r.status_code} - {r.text}") + return False + +# Step 3: Mirror from Gitea → GitHub +def mirror_repo(repo_name, clone_url): + local_path = os.path.join(CLONE_DIR, repo_name) + try: + if os.path.exists(local_path): + logging.info(f"Repo {repo_name} already cloned. Fetching updates...") + subprocess.run(["git", "--git-dir", f"{local_path}/.git", "fetch", "--all"], check=True) + else: + logging.info(f"Cloning {repo_name} from Gitea...") + subprocess.run(["git", "clone", "--mirror", clone_url, local_path], check=True) + + github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git" + logging.info(f"Pushing {repo_name} to GitHub...") + subprocess.run(["git", "--git-dir", local_path, "push", "--mirror", github_url], check=True) + logging.info(f"✅ Finished mirroring {repo_name}") + + except subprocess.CalledProcessError as e: + logging.error(f"❌ Error mirroring {repo_name}: {e}") + +def main(): + gitea_repos = get_gitea_repos() + + for repo in gitea_repos: + repo_name = repo["name"] + description = repo.get("description", "") + clone_url = repo["clone_url"] + + logging.info(f"Processing repo: {repo_name}") + if create_github_repo(repo_name, description): + mirror_repo(repo_name, clone_url) + +if __name__ == "__main__": + main() + logging.info(f"All done! Log saved to {log_filename}") diff --git a/lfs-push-repo.sh b/lfs-push-repo.sh new file mode 100644 index 0000000..7a52160 --- /dev/null +++ b/lfs-push-repo.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +# ---------------- CONFIG ---------------- +PROJECTS_DIR="git_repo_project_files" # Working clones +LFS_EXTENSIONS=("*.so" "*.zip" "*.bin") # Large files to track + +# ---------------- HARD-CODED TOKEN ---------------- +GITHUB_TOKEN="" + +# ---------------- VALIDATION ---------------- +if [ $# -lt 3 ]; then + echo "Usage: $0 " + echo "Example: $0 solo-level-app-automation https://gitea.arulbalaji.xyz/arul/solo-level-app-automation.git https://github.com/username/repo.git" + exit 1 +fi + +REPO_NAME="$1" +GITEA_URL="$2" +GITHUB_URL="$3" +WORKDIR="$PROJECTS_DIR/$REPO_NAME" + +echo "🔹 Processing repository: $REPO_NAME" + +# ---------------- CREATE BASE DIR ---------------- +mkdir -p "$PROJECTS_DIR" + +# ---------------- CLONE / UPDATE ---------------- +if [ ! -d "$WORKDIR/.git" ]; then + echo "Cloning from Gitea..." + git clone "$GITEA_URL" "$WORKDIR" || { echo "❌ Clone failed"; exit 1; } +else + echo "Working repo exists. Ensuring it's a proper checkout and pulling latest changes..." + cd "$WORKDIR" || { echo "❌ Failed to enter directory"; exit 1; } + git fetch origin + git fetch github 2>/dev/null + git pull origin main || echo "⚠️ Pull failed, continuing..." + cd - >/dev/null +fi + +cd "$WORKDIR" || { echo "❌ Failed to cd into $WORKDIR"; exit 1; } + +# ---------------- GIT LFS ---------------- +echo "Initializing Git LFS..." +git lfs install + +echo "Tracking large files: ${LFS_EXTENSIONS[*]}" +for ext in "${LFS_EXTENSIONS[@]}"; do + git lfs track "$ext" +done + +# Commit .gitattributes if needed +if git status --porcelain | grep -q ".gitattributes"; then + git add .gitattributes + git commit -m "Track large files with Git LFS" || echo "⚠️ No commit needed" +fi + +# ---------------- REWRITE HISTORY WITH LFS ---------------- +echo "=== MIGRATING HISTORY TO LFS (this rewrites history) ===" + +CMD="git lfs migrate import --include=\"*.so,*.zip,*.bin\" --everything --yes" +echo "Running: $CMD" + +eval $CMD || { echo "❌ git lfs migrate import failed"; exit 1; } + +echo "✔ History rewritten successfully." + +# ---------------- GITHUB REMOTE ---------------- +AUTH_URL="${GITHUB_URL/https:\/\//https://$GITHUB_TOKEN@}" + +if ! git remote | grep -q github; then + echo "Adding GitHub remote..." + git remote add github "$AUTH_URL" +else + echo "Updating GitHub remote URL..." + git remote set-url github "$AUTH_URL" +fi + +# ---------------- FORCE PUSH (because history changed) ---------------- +echo "🚀 Force pushing rewritten history to GitHub..." +git push github --all --force +git push github --tags --force + +echo +echo "✅ Done pushing $REPO_NAME to GitHub with rewritten LFS history!" +echo "🔥 Your GitHub repo is now clean and optimized." diff --git a/mirror-contribution-graph.py b/mirror-contribution-graph.py new file mode 100644 index 0000000..ff551b5 --- /dev/null +++ b/mirror-contribution-graph.py @@ -0,0 +1,78 @@ +import subprocess +import os +import logging +from datetime import datetime + +# ---------------- CONFIG ---------------- +GITHUB_USER = "" +GITHUB_EMAIL = "" +GITHUB_TOKEN = "" + +# Folder where mirrored bare repos from Gitea exist +REPOS_DIR = "./gitea_repos" # your folder with bare clones + +# ---------------- Logging Setup ---------------- +log_filename = f"heatmap_update_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(log_filename), + logging.StreamHandler() + ] +) + +# ---------------- FUNCTIONS ---------------- +def list_local_repos(): + """Return a list of folder names in REPOS_DIR (bare git repos)""" + repos = [] + for entry in os.listdir(REPOS_DIR): + full_path = os.path.join(REPOS_DIR, entry) + if os.path.isdir(full_path): + # Treat folder as bare git repo if it has HEAD file + if os.path.exists(os.path.join(full_path, "HEAD")): + repos.append(entry) + return repos + +def process_repo(repo_name): + local_path = os.path.join(REPOS_DIR, repo_name) + github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git" + + try: + logging.info(f"Fetching latest commits for {repo_name}...") + subprocess.run(["git", "--git-dir", local_path, "fetch", "--all"], check=True) + + # Rewrite commits to GitHub email (preserves dates) + logging.info(f"Rewriting commits in {repo_name} to GitHub email...") + subprocess.run([ + "git", "--git-dir", local_path, "filter-repo", + "--email-callback", f'return b"{GITHUB_EMAIL}"', + "--force" + ], check=True) + + # Push to GitHub + logging.info(f"Pushing {repo_name} to GitHub...") + subprocess.run(["git", "--git-dir", local_path, "push", "--mirror", github_url], check=True) + + logging.info(f"✅ Finished processing {repo_name}") + + except subprocess.CalledProcessError as e: + logging.error(f"❌ Error processing {repo_name}: {e}") + +# ---------------- MAIN ---------------- +def main(): + logging.info(f"Starting heatmap update for GitHub user: {GITHUB_USER}") + + existing_repos = list_local_repos() + if not existing_repos: + logging.info(f"No repos found in {REPOS_DIR}. Nothing to process.") + return + + for repo_name in existing_repos: + logging.info(f"Processing repo: {repo_name}") + process_repo(repo_name) + + logging.info(f"All done! Log saved to {log_filename}") + +if __name__ == "__main__": + main() diff --git a/mirror-lfs-contribution-graph.py b/mirror-lfs-contribution-graph.py new file mode 100644 index 0000000..9d3edbc --- /dev/null +++ b/mirror-lfs-contribution-graph.py @@ -0,0 +1,88 @@ +import subprocess +import os +import logging +from datetime import datetime + +# ---------------- CONFIG ---------------- +GITHUB_USER = "" +GITHUB_EMAIL = "" +GITHUB_TOKEN = "" # <-- Put your GitHub PAT here + +# Folder where working LFS-migrated repos exist +PROJECTS_DIR = "./git_repo_project_files" + +# ---------------- Logging Setup ---------------- +log_filename = f"lfs_heatmap_update_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[ + logging.FileHandler(log_filename), + logging.StreamHandler() + ] +) + +# ---------------- FUNCTIONS ---------------- +def list_local_repos(): + """Return a list of folder names in PROJECTS_DIR""" + repos = [] + for entry in os.listdir(PROJECTS_DIR): + full_path = os.path.join(PROJECTS_DIR, entry) + if os.path.isdir(full_path) and os.path.exists(os.path.join(full_path, ".git")): + repos.append(entry) + return repos + +def process_repo(repo_name): + local_path = os.path.join(PROJECTS_DIR, repo_name) + github_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.git" + + try: + logging.info(f"Fetching latest commits for {repo_name}...") + subprocess.run(["git", "-C", local_path, "fetch", "--all"], check=True) + + # Rewrite commits to GitHub email (preserve dates) + logging.info(f"Rewriting commits in {repo_name} to GitHub email...") + subprocess.run([ + "git", "-C", local_path, "filter-repo", + "--email-callback", f'return b"{GITHUB_EMAIL}"', + "--force" + ], check=True) + + # Ensure 'github' remote exists + remotes = subprocess.run( + ["git", "-C", local_path, "remote"], + capture_output=True, text=True + ).stdout.splitlines() + if "github" not in remotes: + logging.info(f"Adding 'github' remote for {repo_name}...") + subprocess.run(["git", "-C", local_path, "remote", "add", "github", github_url], check=True) + else: + logging.info(f"Updating 'github' remote URL for {repo_name}...") + subprocess.run(["git", "-C", local_path, "remote", "set-url", "github", github_url], check=True) + + # Push all refs to GitHub + logging.info(f"Pushing all branches and tags of {repo_name} to GitHub...") + subprocess.run(["git", "-C", local_path, "push", "--mirror", "github"], check=True) + + logging.info(f"✅ Finished processing {repo_name}") + + except subprocess.CalledProcessError as e: + logging.error(f"❌ Error processing {repo_name}: {e}") + +# ---------------- MAIN ---------------- +def main(): + logging.info(f"Starting LFS heatmap update for GitHub user: {GITHUB_USER}") + + existing_repos = list_local_repos() + if not existing_repos: + logging.info(f"No repos found in {PROJECTS_DIR}. Nothing to process.") + return + + for repo_name in existing_repos: + logging.info(f"Processing repo: {repo_name}") + process_repo(repo_name) + + logging.info(f"All done! Log saved to {log_filename}") + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a837ebd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests>=2.31.0 +git-filter-repo \ No newline at end of file