image detection not working

This commit is contained in:
2026-01-08 18:00:24 +01:00
parent a7fd2af552
commit 61c58e632d

View File

@@ -3,7 +3,6 @@ import requests
import re import re
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import subprocess import subprocess
import time
import logging import logging
# Configure logging # Configure logging
@@ -51,10 +50,10 @@ def redmine_get_wiki_pages():
try: try:
page_response = requests.get(page_url, headers=headers) page_response = requests.get(page_url, headers=headers)
page_response.raise_for_status() page_response.raise_for_status()
page_soup = BeautifulSoup(page_response.content, "xml") page_soup = BeautifulSoup(page_response.content, "xml")
content = page_soup.find("text").text if page_soup.find("text") else "" content = page_soup.find("text").text if page_soup.find("text") else ""
pages.append({"title": title, "parent": parent, "content": content}) pages.append({"title": title, "parent": parent, "content": content})
logger.info(f"Fetched content for: {title}") logger.info(f"Fetched content for: {title}")
except Exception as e: except Exception as e:
@@ -70,18 +69,18 @@ def redmine_markup_to_markdown(content, pages):
# Convert links - handle both [[page]] and [page] syntax # Convert links - handle both [[page]] and [page] syntax
def replace_link(match): def replace_link(match):
link = match.group(1) link = match.group(1)
if link in pages: # Ensure the linked page exists if link in pages: # Ensure the linked page exists
# Create a relative path based on parent-child hierarchy # Create a relative path based on parent-child hierarchy
href = f"{pages[link]['title'].replace(' ', '_')}.md" href = f"{pages[link]['title'].replace(' ', '_')}.md"
return f"[{link}]({href})" return f"[{link}]({href})"
else: else:
logger.warning(f"Linked page not found: {link}") logger.warning(f"Linked page not found: {link}")
# Return original link if linked page not found # Return original link if linked page not found
return match.group() return match.group()
# Replace [[page]] with [page](page_url) # Replace [[page]] with [page](page_url)
content = re.sub(r'\[\[(.*?)\]\]', replace_link, content) content = re.sub(r'\[\[(.*?)\]\]', replace_link, content)
@@ -107,7 +106,7 @@ def redmine_markup_to_markdown(content, pages):
# Convert code blocks # Convert code blocks
content = re.sub(r'<pre>(.*?)</pre>', r'```python\n\1\n```', content, flags=re.DOTALL) content = re.sub(r'<pre>(.*?)</pre>', r'```python\n\1\n```', content, flags=re.DOTALL)
# Convert inline code # Convert inline code
content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content) content = re.sub(r'\{\{(.*?)\}\}', r'`\1`', content)
@@ -125,17 +124,17 @@ def download_image(image_url, local_dir):
headers = {"X-Redmine-API-Key": REDMINE_API_KEY} headers = {"X-Redmine-API-Key": REDMINE_API_KEY}
response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers) response = requests.get(f"{REDMINE_URL}{image_url}", headers=headers)
response.raise_for_status() response.raise_for_status()
# Extract filename from URL # Extract filename from URL
filename = os.path.basename(image_url) filename = os.path.basename(image_url)
filepath = os.path.join(local_dir, "images", filename) filepath = os.path.join(local_dir, "images", filename)
# Create images directory if it doesn't exist # Create images directory if it doesn't exist
os.makedirs(os.path.dirname(filepath), exist_ok=True) os.makedirs(os.path.dirname(filepath), exist_ok=True)
with open(filepath, 'wb') as f: with open(filepath, 'wb') as f:
f.write(response.content) f.write(response.content)
logger.info(f"Downloaded image: {filename}") logger.info(f"Downloaded image: {filename}")
return f"./images/{filename}" return f"./images/{filename}"
except Exception as e: except Exception as e:
@@ -168,7 +167,7 @@ def push_to_gitea_wiki(local_dir, pages):
elif os.path.isdir(item_path): elif os.path.isdir(item_path):
import shutil import shutil
shutil.rmtree(item_path) shutil.rmtree(item_path)
# Create pages as .md files # Create pages as .md files
for page in pages: for page in pages:
title = page['title'] title = page['title']
@@ -178,7 +177,7 @@ def push_to_gitea_wiki(local_dir, pages):
filename = "Home.md" filename = "Home.md"
else: else:
filename = f"{title.replace(' ', '_')}.md" filename = f"{title.replace(' ', '_')}.md"
filepath = os.path.join(local_dir, filename) filepath = os.path.join(local_dir, filename)
if parent and parent in page_map: if parent and parent in page_map:
@@ -190,70 +189,84 @@ def push_to_gitea_wiki(local_dir, pages):
# Parent directory exists, create subdirectory for this page # Parent directory exists, create subdirectory for this page
page_subdir = os.path.dirname(filepath) page_subdir = os.path.dirname(filepath)
os.makedirs(page_subdir, exist_ok=True) os.makedirs(page_subdir, exist_ok=True)
else: else:
# This is a root level page, no need to create subdirectories # This is a root level page, no need to create subdirectories
pass pass
# Convert content to Markdown and handle links # Convert content to Markdown and handle links
markdown_content = redmine_markup_to_markdown(page['content'], page_map) markdown_content = redmine_markup_to_markdown(page['content'], page_map)
# Find all images in the content to log them # Find all images in the content to log them
image_pattern = r'!(.*?)!' image_pattern = r'!(.*?)!'
found_images = re.findall(image_pattern, markdown_content) found_images = re.findall(image_pattern, markdown_content)
if found_images: if found_images:
logger.info(f"Found images on page '{title}': {found_images}") logger.info(f"Found images on page '{title}': {found_images}")
# Download images referenced in the content # Process all !image! patterns in the content
# First, we need to find all image URLs and download them original_content = markdown_content
# Look for Redmine-style image tags like !image.png!
def replace_image(match): # Find all image tags in the format !image_name!
image_name = match.group(1) image_tags = re.findall(r'!(.*?)!', original_content)
# Try to find the actual image URL by searching for it in the page content # For each image tag, attempt to download it and replace it with Markdown syntax
# This is a simplified approach - we'll assume the image name matches for image_tag in image_tags:
# what's found in Redmine's image system if image_tag and not image_tag.startswith('\\'):
if image_name: logger.info(f"Found image tag: {image_tag}")
# In Redmine, images are usually at /attachments/... path # Replace the specific tag with our download logic
image_url = f"/attachments/download/{image_name}" markdown_content = markdown_content.replace(
logger.info(f"Attempting to download image: {image_name}") f"!{image_tag}!",
handle_image_tag(image_tag, local_dir)
downloaded_path = download_image(image_url, local_dir) )
if downloaded_path:
return f"![]({downloaded_path})"
else:
# If download fails, keep the original format
return f"!{image_name}!"
return match.group(0)
# We need to handle both !image.png! and ![alt](image) formats
# For now, we'll process !image.png! patterns specifically
markdown_content = re.sub(image_pattern, replace_image, markdown_content)
# Write to file # Write to file
with open(filepath, 'w', encoding='utf-8') as f: with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"# {title}\n\n") f.write(f"# {title}\n\n")
f.write(markdown_content) f.write(markdown_content)
logger.info(f"Saved page: {filename}") logger.info(f"Saved page: {filename}")
# Add, commit, and push # Add, commit, and push
os.chdir(local_dir) os.chdir(local_dir)
subprocess.run(["git", "add", "."], check=True) subprocess.run(["git", "add", "."], check=True)
subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False) subprocess.run(["git", "commit", "-m", "'Import Redmine wiki pages'"], check=False)
subprocess.run(["git", "push", "origin", "main"], check=True) subprocess.run(["git", "push", "origin", "main"], check=True)
logger.info("✅ Wiki pages pushed to Gitea!") logger.info("✅ Wiki pages pushed to Gitea!")
def handle_image_tag(image_name, local_dir):
"""Handle replacement of image tags with downloaded images"""
# Check if the image has a valid extension (.png, .jpg, or .jpeg)
valid_extensions = ('.png', '.jpg', '.jpeg')
if not any(image_name.lower().endswith(ext) for ext in valid_extensions):
logger.warning(f"Invalid image extension in tag: {image_name}")
return f"!{image_name}!" # Return original format for invalid extensions
if image_name and not image_name.startswith('\\'):
# In Redmine, images are usually at /attachments/... path
image_url = f"/attachments/download/{image_name}"
logger.info(f"Attempting to download image: {image_name}")
# Try to download the image
downloaded_path = download_image(image_url, local_dir)
if downloaded_path:
return f"![]({downloaded_path})"
else:
# If download fails, keep the original format
return f"!{image_name}!"
else:
# This is likely a regex capture group that wasn't meant to be an image
return f"!{image_name}!"
def main(): def main():
# Step 1: Get wiki pages from Redmine # Step 1: Get wiki pages from Redmine
logger.info("Fetching Redmine wiki pages...") logger.info("Fetching Redmine wiki pages...")
pages = redmine_get_wiki_pages() pages = redmine_get_wiki_pages()
# Step 2: Prepare local directory for Gitea wiki # Step 2: Prepare local directory for Gitea wiki
local_wiki_dir = "gitea_wiki_clone" local_wiki_dir = "gitea_wiki_clone"
clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir) clone_or_init_wiki_repo(GITEA_WIKI_URL, local_wiki_dir)
# Step 3: Push pages to Gitea # Step 3: Push pages to Gitea
push_to_gitea_wiki(local_wiki_dir, pages) push_to_gitea_wiki(local_wiki_dir, pages)