275 lines
10 KiB
Python
275 lines
10 KiB
Python
import os
|
|
import requests
|
|
import re
|
|
from bs4 import BeautifulSoup
|
|
import subprocess
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# === Configuration ===
|
|
REDMINE_URL = "https://redmine.maketank.net" # e.g., https://redmine.example.com
|
|
REDMINE_API_KEY = "62120b7ed6bf84023b2a1d7d5a265cf172f8d607"
|
|
PROJECT_ID = "bastelstube" # e.g., "my-project"
|
|
|
|
#GITEA_WIKI_URL = "https://git.maketank.net/chaos/spielplatz.wiki.git" # Gitea wiki Git URL
|
|
GITEA_WIKI_URL = "ssh://git@git.maketank.net:2222/chaos/spielplatz.wiki.git"
|
|
#GITEA_USERNAME = "do"
|
|
#GITEA_TOKEN = "2d2be00637182c6d5411512f456218ee59fa1f81" # Use a personal access token
|
|
|
|
# Optional: Gitea API for creating pages (if you want to use it instead of Git)
|
|
# GITEA_API_URL = "https://gitea.example.com/api/v1"
|
|
|
|
# === Functions ===
|
|
|
|
def redmine_get_wiki_pages():
|
|
"""Get all wiki pages from Redmine project"""
|
|
# First get the list of pages and their parent information
|
|
index_url = f"{REDMINE_URL}/projects/{PROJECT_ID}/wiki/index.xml"
|
|
headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
|
|
response = requests.get(index_url, headers=headers)
|
|
response.raise_for_status()
|
|
|
|
soup = BeautifulSoup(response.content, "xml")
|
|
pages = []
|
|
|
|
for page in soup.find_all("wiki_page"):
|
|
title = page.find("title").text
|
|
parent = page.find('parent')
|
|
if parent:
|
|
parent = parent['title']
|
|
else:
|
|
parent = None # Root level pages have no parent
|
|
|
|
logger.info(f"Found page: {title} (Parent: {parent})")
|
|
|
|
# Fetch individual page content
|
|
page_url = f"{REDMINE_URL}/projects/{PROJECT_ID}/wiki/{title}.xml"
|
|
try:
|
|
page_response = requests.get(page_url, headers=headers)
|
|
page_response.raise_for_status()
|
|
|
|
page_soup = BeautifulSoup(page_response.content, "xml")
|
|
content = page_soup.find("text").text if page_soup.find("text") else ""
|
|
|
|
pages.append({"title": title, "parent": parent, "content": content})
|
|
logger.info(f"Fetched content for: {title}")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to fetch content for page '{title}': {e}")
|
|
# Add empty content if fetch fails
|
|
pages.append({"title": title, "parent": parent, "content": ""})
|
|
|
|
return pages
|
|
|
|
def redmine_markup_to_markdown(content, pages):
|
|
"""Convert Redmine wiki markup to Markdown"""
|
|
|
|
# Convert links - handle both [[page]] and [page] syntax
|
|
def replace_link(match):
|
|
link = match.group(1)
|
|
|
|
if link in pages: # Ensure the linked page exists
|
|
# Create a relative path based on parent-child hierarchy
|
|
href = f"{pages[link]['title'].replace(' ', '_')}.md"
|
|
|
|
return f"[{link}]({href})"
|
|
|
|
else:
|
|
logger.warning(f"Linked page not found: {link}")
|
|
# Return original link if linked page not found
|
|
return match.group()
|
|
|
|
# Replace [[page]] with [page](page_url)
|
|
content = re.sub(r'\[\[(.*?)\]\]', replace_link, content)
|
|
|
|
# Convert bold (strong in textile)
|
|
content = re.sub(r'\*\*(.*?)\*\*', r'**\1**', content)
|
|
content = re.sub(r'\*(.*?)\*', r'*\1*', content)
|
|
|
|
# Convert headings
|
|
content = re.sub(r'^h1\.(.*?)$', r'# \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^h2\.(.*?)$', r'## \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^h3\.(.*?)$', r'### \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^h4\.(.*?)$', r'#### \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^h5\.(.*?)$', r'##### \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^h6\.(.*?)$', r'###### \1', content, flags=re.MULTILINE)
|
|
|
|
# Convert lists
|
|
content = re.sub(r'^\s*\* (.*?)$', r'- \1', content, flags=re.MULTILINE)
|
|
content = re.sub(r'^\s*\# (.*?)$', r'1. \1', content, flags=re.MULTILINE)
|
|
|
|
# Convert images - handle !image! and 
|
|
content = re.sub(r'!\[(.*?)\]\((.*?)\)', r'', content)
|
|
content = re.sub(r'!(.*?)!', r'', content)
|
|
|
|
# Convert code blocks
|
|
content = re.sub(r'<pre>(.*?)</pre>', r'```python\n\1\n```', content, flags=re.DOTALL)
|
|
|
|
# Convert inline code
|
|
content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content)
|
|
|
|
# Remove HTML tags (if any)
|
|
content = re.sub(r'<[^>]+>', '', content)
|
|
|
|
# Handle textile-specific elements like blockquotes
|
|
content = re.sub(r'^bq\.(.*?)$', r'> \1', content, flags=re.MULTILINE)
|
|
|
|
return content
|
|
|
|
def download_image(image_url, local_dir):
|
|
"""Download an image from Redmine and save it locally"""
|
|
try:
|
|
headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
|
|
response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers)
|
|
response.raise_for_status()
|
|
|
|
# Extract filename from URL
|
|
filename = os.path.basename(image_url)
|
|
filepath = os.path.join(local_dir, "images", filename)
|
|
|
|
# Create images directory if it doesn't exist
|
|
os.makedirs(os.path.dirname(filepath), exist_ok=True)
|
|
|
|
with open(filepath, 'wb') as f:
|
|
f.write(response.content)
|
|
|
|
logger.info(f"Downloaded image: {filename}")
|
|
return f"./images/{filename}"
|
|
except Exception as e:
|
|
logger.warning(f"Failed to download image from {image_url}: {e}")
|
|
return None
|
|
|
|
def clone_or_init_wiki_repo(repo_url, local_dir):
|
|
"""Clone or initialize the Gitea wiki repo"""
|
|
if not os.path.exists(local_dir):
|
|
logger.info(f"Cloning Gitea wiki repository: {repo_url}")
|
|
subprocess.run(["git", "clone", repo_url, local_dir], check=True)
|
|
else:
|
|
logger.info(f"Using existing local wiki repo: {local_dir}")
|
|
os.chdir(local_dir)
|
|
subprocess.run(["git", "pull"], check=True)
|
|
os.chdir('..')
|
|
return local_dir
|
|
|
|
def push_to_gitea_wiki(local_dir, pages):
|
|
"""Add pages as Markdown files and push to Gitea"""
|
|
page_map = {page['title']: page for page in pages} # Map titles to their data
|
|
curr_dir = os.getcwd()
|
|
logger.info(f"We are in: {curr_dir}")
|
|
# Remove all existing files in the repo (except .git folder)
|
|
for item in os.listdir(local_dir):
|
|
if item != '.git':
|
|
item_path = os.path.join(local_dir, item)
|
|
if os.path.isfile(item_path):
|
|
os.remove(item_path)
|
|
elif os.path.isdir(item_path):
|
|
import shutil
|
|
shutil.rmtree(item_path)
|
|
|
|
# Create pages as .md files
|
|
for page in pages:
|
|
title = page['title']
|
|
parent = page.get('parent')
|
|
|
|
if title == 'Wiki':
|
|
filename = "Home.md"
|
|
else:
|
|
filename = f"{title.replace(' ', '_')}.md"
|
|
|
|
filepath = os.path.join(local_dir, filename)
|
|
|
|
if parent and parent in page_map:
|
|
# If this page has a parent, create relative path from the parent's directory
|
|
parent_filename = f"{parent.replace(' ', '_')}.md"
|
|
parent_filepath = os.path.join(local_dir, parent_filename)
|
|
|
|
if os.path.exists(parent_filepath):
|
|
# Parent directory exists, create subdirectory for this page
|
|
page_subdir = os.path.dirname(filepath)
|
|
os.makedirs(page_subdir, exist_ok=True)
|
|
|
|
else:
|
|
# This is a root level page, no need to create subdirectories
|
|
pass
|
|
|
|
# Convert content to Markdown and handle links
|
|
markdown_content = redmine_markup_to_markdown(page['content'], page_map)
|
|
|
|
# Find all images in the content to log them
|
|
image_pattern = r'!(.*?)!'
|
|
found_images = re.findall(image_pattern, markdown_content)
|
|
if found_images:
|
|
logger.info(f"Found images on page '{title}': {found_images}")
|
|
|
|
# Process all !image! patterns in the content
|
|
original_content = markdown_content
|
|
|
|
# Find all image tags in the format !image_name!
|
|
image_tags = re.findall(r'!(.*?)!', original_content)
|
|
|
|
# For each image tag, attempt to download it and replace it with Markdown syntax
|
|
for image_tag in image_tags:
|
|
if image_tag and not image_tag.startswith('\\'):
|
|
logger.info(f"Found image tag: {image_tag}")
|
|
# Replace the specific tag with our download logic
|
|
markdown_content = markdown_content.replace(
|
|
f"!{image_tag}!",
|
|
handle_image_tag(image_tag, local_dir)
|
|
)
|
|
|
|
# Write to file
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(f"# {title}\n\n")
|
|
f.write(markdown_content)
|
|
|
|
logger.info(f"Saved page: {filename}")
|
|
|
|
# Add, commit, and push
|
|
os.chdir(local_dir)
|
|
subprocess.run(["git", "add", "."], check=True)
|
|
subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False)
|
|
subprocess.run(["git", "push", "origin", "main"], check=True)
|
|
|
|
logger.info("✅ Wiki pages pushed to Gitea!")
|
|
|
|
def handle_image_tag(image_name, local_dir):
|
|
"""Handle replacement of image tags with downloaded images"""
|
|
# Check if the image has a valid extension (.png, .jpg, or .jpeg)
|
|
valid_extensions = ('.png', '.jpg', '.jpeg')
|
|
if not any(image_name.lower().endswith(ext) for ext in valid_extensions):
|
|
logger.warning(f"Invalid image extension in tag: {image_name}")
|
|
return f"!{image_name}!" # Return original format for invalid extensions
|
|
|
|
if image_name and not image_name.startswith('\\'):
|
|
# In Redmine, images are usually at /attachments/... path
|
|
image_url = f"/attachments/download/{image_name}"
|
|
logger.info(f"Attempting to download image: {image_name}")
|
|
|
|
# Try to download the image
|
|
downloaded_path = download_image(image_url, local_dir)
|
|
if downloaded_path:
|
|
return f""
|
|
else:
|
|
# If download fails, keep the original format
|
|
return f"!{image_name}!"
|
|
else:
|
|
# This is likely a regex capture group that wasn't meant to be an image
|
|
return f"!{image_name}!"
|
|
|
|
def main():
|
|
# Step 1: Get wiki pages from Redmine
|
|
logger.info("Fetching Redmine wiki pages...")
|
|
pages = redmine_get_wiki_pages()
|
|
|
|
# Step 2: Prepare local directory for Gitea wiki
|
|
local_wiki_dir = "gitea_wiki_clone"
|
|
clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir)
|
|
|
|
# Step 3: Push pages to Gitea
|
|
push_to_gitea_wiki(local_wiki_dir, pages)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|