diff --git a/tasks/archiving/__init__.py b/tasks/archiving/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/archiving/config.py b/tasks/archiving/config.py new file mode 100644 index 00000000..8dd88b54 --- /dev/null +++ b/tasks/archiving/config.py @@ -0,0 +1,18 @@ +from typing import Dict, Union, List + +# Define constants for configuration keys +TEMPLATE_NAME_KEY = 'template_name_with_namespace' +ARCHIVING_TEMPLATE_KEY = 'automated_archiving_template' +SECTION_TYPE_KEYS = 'section_type' +SKIP_TEMPLATES_KEY = 'skip_templates' + +# Define the type for configuration values +ConfigValue = Union[str, int, bool, List[str]] # Extend as needed + +# Define the configuration dictionary +USER_CONFIG: Dict[str, ConfigValue] = { + TEMPLATE_NAME_KEY: 'قالب:أرشيف_آلي', + ARCHIVING_TEMPLATE_KEY: 'أرشفة آلية', + SECTION_TYPE_KEYS: ['حجم', 'قسم'], # Example list of section types + SKIP_TEMPLATES_KEY: ['رشف', 'آخر'] # List of skip templates +} diff --git a/tasks/archiving/core/__init__.py b/tasks/archiving/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tasks/archiving/core/archiver.py b/tasks/archiving/core/archiver.py new file mode 100644 index 00000000..3f686e3b --- /dev/null +++ b/tasks/archiving/core/archiver.py @@ -0,0 +1,196 @@ +import re +from datetime import datetime +import wikitextparser as wtp +import pywikibot +import hashlib +from core.utils.helpers import prepare_str + + +class Options: + def __init__(self, page: pywikibot.Page, template_name: str = "أرشفة آلية"): + """ + Initializes the object with the given `page` and `template_name`. + + Parameters: + page (pywikibot.Page): The page object. + template_name (str, optional): The name of the template. Defaults to "أرشفة آلية". + + Returns: + None + """ + self.template_name = template_name + self.page = page + self.option = ('قسم', '3', None) + self._get_params() + + def _get_template(self): + """ + Retrieves the template with the specified name from the page's wikitext. + + Returns: + wtp.Template or None: The template object if found, None otherwise. + """ + text = self.page.get() + templates = wtp.parse(text).templates + for t in templates: + if t.name == self.template_name: + return t + return None + + def _get_params(self): + """ + Retrieves the parameters from the template. + + Returns: + tuple or None: A tuple containing the values of the template arguments if the template has exactly three arguments, + or None if the template is not found or has a different number of arguments. + """ + template = self._get_template() + if template is None: + return None + + arguments = template.arguments + if len(arguments) == 3: + self.option = (arguments[0].value, arguments[1].value, arguments[2].value) + + + +class Section: + def __init__(self, title, content): + self.title = title.strip() + self.content = content + self.id = self._generate_id() + self.skip = False + self.skip_templates = [prepare_str("لا للأرشفة")] + self._skip() + def _generate_id(self): + content_hash = hashlib.sha1(self.content.encode('utf-8', 'ignore')).hexdigest().encode('utf-8', 'ignore') + return f"{prepare_str(self.title)}_{content_hash}" + def _skip(self): + parse = wtp.parse(self.content) + for template in parse.templates: + if prepare_str(template.normal_name()) in self.skip_templates: + self.skip = True + break + +class Archiver: + def __init__(self, page: pywikibot.Page): + """ + Initializes a Archiver object. + Args: + page (pywikibot.Page): The page to be edited. + """ + # The page to be edited + self.talk_page = page + self.options = (Options(self.talk_page)).option + + def archive_talk_page(self): + """ + Archives the talk page of the user. + """ + text = self.talk_page.get() + header = self._extract_header(text) + current_time = datetime.utcnow() + archive_text = '' + remaining_text = '' + + sections = self._split_sections(text) + + last_comment_timestamps = self.get_last_comment_timestamps() + + + + for section_title, section_content in sections: + section = Section(section_title, section_content) + + if section.skip: + remaining_text += section_title + section_content + continue + + if section.id in last_comment_timestamps: + last_comment_time = last_comment_timestamps[section.id] + if (current_time - last_comment_time).days > int(self.options[1]): + archive_text += section_title + section_content + else: + remaining_text += section_title + section_content + else: + remaining_text += section_title + section_content + + if self.options[0] != 'قسم': + if len(self.talk_page.text) < int(self.options[1]) * 1000: + archive_text = '' + + if archive_text: + print("test") + # archive_page = pywikibot.Page(self.site, f'{ARCHIVE_PAGE_PREFIX}{current_time.strftime("%Y-%m")}') + # archive_page.text += archive_text + # archive_page.save(summary='Archiving old discussions') + # + # self.talk_page.text = remaining_text + # self.talk_page.save(summary='Archiving old discussions') + else: + print("No sections to archive.") + + def get_last_comment_timestamps(self): + history = self.talk_page.revisions(reverse=False, total=20, content=True) # Fetch last 500 revisions + section_last_edit = {} + seen_sections = set() + + for revision in history: + try: + timestamp = revision.timestamp + content = revision.text + + sections = self._split_sections(content) + current_sections = set() + + for section_title, section_content in sections: + section = Section(section_title, section_content) + current_sections.add(section.id) + + if section.id not in section_last_edit: + section_last_edit[section.id] = timestamp + else: + section_last_edit[section.id] = min(section_last_edit[section.id], timestamp) + + removed_sections = seen_sections - current_sections + for section_id in removed_sections: + if section_id not in section_last_edit: + section_last_edit[section_id] = timestamp + + seen_sections = current_sections + except Exception as e: + print(f"Error processing revision {revision.revid}: {e}") + + return section_last_edit + + def _split_sections(self, text): + parsed = wtp.parse(text) + sections = parsed.sections + # show only sections with level 2 + return [(section.title, section.string) for section in sections if section.level == 2] + + def _extract_header(self, text): + parsed = wtp.parse(text) + templates = parsed.templates + + headers = [] + for template in templates: + if template.name == 'رشف': + headers.append(template.span[0]) + headers.append(template.span[1]) + if len(headers) <= 1: + return "" + if len(headers) >= 2: + return text[headers[0]:headers[-1]] + + +site = pywikibot.Site('ar', 'wikipedia') +page_name = "نقاش_المستخدم:لوقا" +page = pywikibot.Page(site, page_name) +archive_obj = Archiver(page) +archive_obj.archive_talk_page() +""" +create class to archive sections +customez archive summary +""" diff --git a/tasks/archiving/core/bot.py b/tasks/archiving/core/bot.py new file mode 100644 index 00000000..ee400ae4 --- /dev/null +++ b/tasks/archiving/core/bot.py @@ -0,0 +1,98 @@ +import pywikibot +import logging +from abc import ABC, abstractmethod +from typing import List, Dict, Callable +from tasks.archiving.config import USER_CONFIG, TEMPLATE_NAME_KEY + + +# Define the Job interface +class Job(ABC): + @abstractmethod + def perform(self, item): + """ + Perform an action on the given item. + :param item: The item to process, e.g., a page or a file + """ + pass + + +# Implement concrete strategies for different jobs +class ActionJob(Job): + def perform(self, page): + # Implement specific action here + print(f"Performing action on page: {page.title()}") + logging.info(f"Performing action on page: {page.title()}") + + + +# Define a HookManager for dynamic hooks +class HookManager: + def __init__(self): + self.hooks: Dict[str, List[Callable]] = { + 'before': [], + # 'main': [], + 'after': [] + } + + def add_hook(self, point: str, hook: Callable): + if point in self.hooks: + self.hooks[point].append(hook) + + def remove_hook(self, point: str, hook: Callable): + if point in self.hooks: + self.hooks[point].remove(hook) + + def run_hooks(self, point: str, item): + if point in self.hooks: + for hook in self.hooks[point]: + hook(item) + + +# Define the CompositeJob class to handle multiple jobs +class CompositeJob(Job): + def __init__(self, hook_manager: HookManager): + self.jobs: List[Job] = [] + self.hook_manager = hook_manager + + def add_job(self, job: Job): + self.jobs.append(job) + + def perform(self, item): + # Run before hooks + self.hook_manager.run_hooks('before', item) + + # Execute main jobs + for job in self.jobs: + job.perform(item) + + # Run after hooks + self.hook_manager.run_hooks('after', item) + + +# Define the abstract Processor class +class Processor(ABC): + def __init__(self, job: Job): + self.job = job # Dependency injection of Job strategy + + @abstractmethod + def get_items(self): + """ + Retrieve the items to be processed. + :return: A list of items to process + """ + pass + + def process_items(self): + items = self.get_items() + for item in items: + self.job.perform(item) # Delegate action to the injected Job strategy + + +# Example hook functions +def before_hook(item): + print(f"Before processing item: {item.title()}") + + +def after_hook(item): + print(f"After processing item: {item.title()}") + diff --git a/tasks/archiving/run.py b/tasks/archiving/run.py new file mode 100644 index 00000000..566c1149 --- /dev/null +++ b/tasks/archiving/run.py @@ -0,0 +1,35 @@ +import pywikibot + +from tasks.archiving.config import USER_CONFIG, TEMPLATE_NAME_KEY +from tasks.archiving.core.bot import HookManager, CompositeJob, ActionJob, Processor, Job + + +# Concrete implementation for WikiPage processing +class WikiPageProcessor(Processor): + def __init__(self, job: Job): + super().__init__(job) + self.site = pywikibot.Site('ar', 'wikipedia') + self.template_name = USER_CONFIG.get(TEMPLATE_NAME_KEY) + self.template_page = pywikibot.Page(self.site, self.template_name) + + def get_items(self): + pages = self.template_page.embeddedin() + filtered_pages = [ + page for page in pages + if page.depth == 0 and not ('edit' in page.protection() and 'sysop' in page.protection()['edit']) + ] + return filtered_pages + + +# Create the HookManager +hook_manager = HookManager() +# hook_manager.add_hook('before', before_hook) +# hook_manager.add_hook('after', after_hook) + +# Create and configure the composite job +composite_job = CompositeJob(hook_manager=hook_manager) +composite_job.add_job(ActionJob()) + +# Create the processor with the composite job +processor = WikiPageProcessor(job=composite_job) +processor.process_items()