diff --git a/README.md b/README.md index e1b1ef1..3c43390 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,8 @@ Please find the list of all configuration options, including descriptions, below | `REMEMBER_USERS_FOR_HOURS` | `--remember-users-for-hours` | No | How long between back-filling attempts for non-followed accounts? Defaults to `168`, i.e. one week. | `HTTP_TIMEOUT` | `--http-timeout` | No | The timeout for any HTTP requests to the Mastodon API in seconds. Defaults to `5`. | -- | `--lock-hours` | No | Determines after how many hours a lock file should be discarded. Not relevant when running the script as GitHub Action, as concurrency is prevented using a different mechanism. Recommended value: `24`. +| -- | `--lock-file` | No | Location for the lock file. If not specified, will use `lock.lock` under the state directory. Not relevant when running the script as GitHub Action. +| -- | `--state-dir` | No | Directory storing persistent files, and the default location for lock file. Not relevant when running the script as GitHub Action. | `ON_START` | `--on-start` | No | Optionally provide a callback URL that will be pinged when processing is starting. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io. | `ON_DONE` | `--on-done` | No | Optionally provide a callback URL that will be called when processing is finished. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io. | `ON_FAIL` | `--on-fail` | No | Optionally provide a callback URL that will be called when processing has failed. A query parameter `rid={uuid}` will automatically be appended to uniquely identify each execution. This can be used to monitor your script using a service such as healthchecks.io. diff --git a/find_posts.py b/find_posts.py index 4c9d37f..e5586ed 100644 --- a/find_posts.py +++ b/find_posts.py @@ -29,6 +29,8 @@ argparser.add_argument('--http-timeout', required = False, type=int, default=5, argparser.add_argument('--backfill-with-context', required = False, type=bool, default=True, help="If enabled, we'll fetch remote replies when backfilling profiles.") argparser.add_argument('--backfill-mentioned-users', required = False, type=bool, default=True, help="If enabled, we'll backfill any mentioned users when fetching remote replies to timeline posts.") argparser.add_argument('--lock-hours', required = False, type=int, default=24, help="The lock timeout in hours.") +argparser.add_argument('--lock-file', required = False, default=None, help="Location of the lock file") +argparser.add_argument('--state-dir', required = False, default="artifacts", help="Directory to store persistent files and possibly lock file") argparser.add_argument('--on-done', required = False, default=None, help="Provide a url that will be pinged when processing has completed. You can use this for 'dead man switch' monitoring of your task") argparser.add_argument('--on-start', required = False, default=None, help="Provide a url that will be pinged when processing is starting. You can use this for 'dead man switch' monitoring of your task") argparser.add_argument('--on-fail', required = False, default=None, help="Provide a url that will be pinged when processing has failed. You can use this for 'dead man switch' monitoring of your task") @@ -750,7 +752,9 @@ if __name__ == "__main__": except Exception as ex: log(f"Error getting callback url: {ex}") - LOCK_FILE = 'artifacts/lock.lock' + if arguments.lock_file is None: + arguments.lock_file = os.path.join(arguments.state, 'lock.lock') + LOCK_FILE = arguments.lock_file if( os.path.exists(LOCK_FILE)): log(f"Lock file exists at {LOCK_FILE}") @@ -785,10 +789,10 @@ if __name__ == "__main__": try: - SEEN_URLS_FILE = "artifacts/seen_urls" - REPLIED_TOOT_SERVER_IDS_FILE = "artifacts/replied_toot_server_ids" - KNOWN_FOLLOWINGS_FILE = "artifacts/known_followings" - RECENTLY_CHECKED_USERS_FILE = "artifacts/recently_checked_users" + SEEN_URLS_FILE = os.path.join(arguments.state, "seen_urls") + REPLIED_TOOT_SERVER_IDS_FILE = os.path.join(arguments.state, "replied_toot_server_ids") + KNOWN_FOLLOWINGS_FILE = os.path.join(arguments.state, "known_followings") + RECENTLY_CHECKED_USERS_FILE = os.path.join(arguments.state, "recently_checked_users") seen_urls = OrderedSet([])