cmsmanage/membershipworks/tasks/scrape.py

172 lines
5.8 KiB
Python

import logging
from datetime import datetime, timedelta
from django.conf import settings
from django.db import transaction
from django.db.models import QuerySet
from membershipworks.membershipworks_api import FieldType, MembershipWorks
from membershipworks.models import (
Event,
EventCategory,
EventExt,
Flag,
Member,
Transaction,
)
logger = logging.getLogger(__name__)
MAX_MEETING_TIME = timedelta(hours=6)
def flags_for_member(csv_member, all_flags, folders):
for flag in all_flags:
if flag.type == "folder":
if csv_member["Account ID"] in folders[flag.id]:
yield flag
elif csv_member[flag.name] == flag.name:
yield flag
def update_flags(mw_flags) -> list[Flag]:
for typ, flags_of_type in mw_flags.items():
for name, id in flags_of_type.items():
flag = Flag(id=id, name=name, type=typ[:-1])
flag.save()
yield flag
def scrape_members(membershipworks: MembershipWorks):
logger.info("Updating flags (labels, levels, and addons)")
flags = list(update_flags(membershipworks._parse_flags()))
logger.info("Getting folder membership")
folders = {
folder_id: membershipworks.get_member_ids([folder_name])
for folder_name, folder_id in membershipworks._parse_flags()["folders"].items()
}
logger.info("Getting/Updating members...")
members = membershipworks.get_all_members()
for csv_member in members:
for field in membershipworks._all_fields().values():
# convert checkboxes to real booleans
if (
field.get("typ") == FieldType.CHECKBOX.value
and field["lbl"] in csv_member
):
csv_member[field["lbl"]] = csv_member[field["lbl"]] == "Y"
# create/update member
member = Member.from_api_dict(csv_member)
member.clean_fields()
member.save()
member.flags.set(flags_for_member(csv_member, flags, folders))
def scrape_transactions(membershipworks: MembershipWorks):
now = datetime.now()
start_date = datetime(2010, 1, 1)
last_transaction = Transaction.objects.order_by("timestamp").last()
if last_transaction is not None:
# technically this has the potential to lose
# transactions, but it should be incredibly unlikely
start_date = last_transaction.timestamp + timedelta(seconds=1)
logger.info(f"Getting/Updating transactions since {start_date}...")
transactions_csv = membershipworks.get_transactions(start_date, now)
transactions_json = membershipworks.get_transactions(start_date, now, json=True)
# this is terrible, but as long as the dates are the same, should be fiiiine
transactions = [{**j, **v} for j, v in zip(transactions_csv, transactions_json)]
assert all(
t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "")
for t in transactions
)
for csv_transaction in transactions:
Transaction.from_api_dict(csv_transaction).save()
@transaction.atomic
def scrape_membershipworks(*args, **options):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
scrape_members(membershipworks)
scrape_transactions(membershipworks)
def scrape_event_details(queryset: QuerySet[EventExt]):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
for event in queryset:
event.details = membershipworks.get_event_by_eid(event.eid)
event.save()
def scrape_events():
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
data = membershipworks.get_events_list(
datetime.fromtimestamp(0), datetime.now() + timedelta(weeks=52), categories=True
)
logger.info(f"{len(data)} events retrieved!")
for category_id, category_data in enumerate(data["_st"]["evg"]):
category = EventCategory.from_api_dict(category_id, category_data)
category.clean_fields()
category.save()
events = Event.objects.bulk_create(
[Event.from_api_dict(event_data) for event_data in data["evt"]],
update_conflicts=True,
update_fields=[
field.attname
for field in Event._meta.get_fields()
if not (
field.auto_created
or field.many_to_many
or not field.concrete
or field.generated
or field.primary_key
)
],
)
for event in events:
try:
event_ext = EventExt.objects.get(event_ptr=event)
except EventExt.DoesNotExist:
event_ext = EventExt(event_ptr=event)
# create extension model instance
event_ext.save_base(raw=True)
event_ext.refresh_from_db()
if (
event_ext.end is not None
and event_ext.end - event_ext.start < MAX_MEETING_TIME
):
meeting_times_count = event_ext.meeting_times.count()
if meeting_times_count == 0:
event_ext.meeting_times.create(start=event_ext.start, end=event_ext.end)
# if there is exactly one meeting time, it should match the event start/end
elif meeting_times_count == 1:
event_ext.meeting_times.update(start=event_ext.start, end=event_ext.end)
# event has no details, or last retrieval was before the event happened
if event_ext.details is None or event_ext.details_timestamp < (
event_ext.end or event_ext.start
):
event_ext.details = membershipworks.get_event_by_eid(event.eid)
event_ext.save()