cmsmanage/membershipworks/tasks/scrape.py

205 lines
6.6 KiB
Python
Raw Permalink Normal View History

import logging
2024-05-04 16:38:51 -04:00
from collections.abc import Iterable
from datetime import datetime, timedelta
from django.conf import settings
from django.db import transaction
from django.db.models import QuerySet
from cmsmanage.django_q2_helper import q_task_group
2024-01-18 14:00:36 -05:00
from membershipworks.membershipworks_api import FieldType, MembershipWorks
from membershipworks.models import (
Event,
EventCategory,
EventExt,
Flag,
Member,
Transaction,
)
logger = logging.getLogger(__name__)
MAX_MEETING_TIME = timedelta(hours=6)
def flags_for_member(csv_member, all_flags, folders):
for flag in all_flags:
if flag.type == "folder":
if csv_member["Account ID"] in folders[flag.id]:
yield flag
2024-01-18 14:00:36 -05:00
elif csv_member[flag.name] == flag.name:
yield flag
2024-05-04 16:38:51 -04:00
def update_flags(mw_flags) -> Iterable[Flag]:
for typ, flags_of_type in mw_flags.items():
2024-08-07 13:46:39 -04:00
for name, flag_id in flags_of_type.items():
flag = Flag(id=flag_id, name=name, type=typ[:-1])
flag.save()
yield flag
def scrape_members(membershipworks: MembershipWorks):
logger.info("Updating flags (labels, levels, and addons)")
flags = list(update_flags(membershipworks._parse_flags()))
logger.info("Getting folder membership")
folders = {
folder_id: membershipworks.get_member_ids([folder_name])
for folder_name, folder_id in membershipworks._parse_flags()["folders"].items()
}
logger.info("Getting/Updating members...")
members = membershipworks.get_all_members()
for csv_member in members:
2024-01-18 13:48:06 -05:00
for field in membershipworks._all_fields().values():
# convert checkboxes to real booleans
2024-01-18 14:00:36 -05:00
if (
field.get("typ") == FieldType.CHECKBOX.value
and field["lbl"] in csv_member
):
csv_member[field["lbl"]] = csv_member[field["lbl"]] == "Y"
# create/update member
member = Member.from_api_dict(csv_member)
member.clean_fields()
member.save()
member.flags.set(flags_for_member(csv_member, flags, folders))
def scrape_transactions(membershipworks: MembershipWorks):
now = datetime.now()
start_date = datetime(2010, 1, 1)
transactions_csv = membershipworks.get_transactions(start_date, now)
transactions_json = membershipworks.get_transactions(start_date, now, json=True)
# this is terrible, but as long as the dates are the same, should be fiiiine
transactions_data = [
2024-08-07 14:29:52 -04:00
{**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True)
]
assert all(
2024-01-18 13:14:00 -05:00
t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "")
for t in transactions_data
)
transactions = [
Transaction.from_api_dict(transaction) for transaction in transactions_data
]
Transaction.objects.bulk_create(
transactions,
update_conflicts=True,
update_fields=(
"member",
"type",
"sum",
"fee",
"event_id",
"for_what",
"items",
"discount_code",
"note",
"name",
"contact_person",
"full_address",
"street",
"city",
"state_province",
"postal_code",
"country",
"phone",
"email",
),
unique_fields=("sid", "timestamp"),
)
@q_task_group("Scrape MembershipWorks Data")
@transaction.atomic
def scrape_membershipworks(*args, **options):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
scrape_members(membershipworks)
scrape_transactions(membershipworks)
def scrape_event_details(queryset: QuerySet[EventExt]):
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
for event in queryset:
event.details = membershipworks.get_event_by_eid(event.eid)
event.registrations = membershipworks.get_event_registrations(event.eid)
event.save()
@q_task_group("Scrape MembershipWorks Events")
def scrape_events():
membershipworks = MembershipWorks()
membershipworks.login(
settings.MEMBERSHIPWORKS_USERNAME, settings.MEMBERSHIPWORKS_PASSWORD
)
data = membershipworks.get_events_list(
datetime.fromtimestamp(0), datetime.now() + timedelta(weeks=52), categories=True
)
logger.info(f"{len(data)} events retrieved!")
for category_id, category_data in enumerate(data["_st"]["evg"]):
category = EventCategory.from_api_dict(category_id, category_data)
category.clean_fields()
category.save()
events = Event.objects.bulk_create(
[Event.from_api_dict(event_data) for event_data in data["evt"]],
update_conflicts=True,
unique_fields=["eid"],
update_fields=[
field.attname
for field in Event._meta.get_fields()
if not (
field.auto_created
or field.many_to_many
or not field.concrete
or field.generated
or field.primary_key
)
],
)
for event in events:
try:
event_ext = EventExt.objects.get(event_ptr=event)
except EventExt.DoesNotExist:
event_ext = EventExt(event_ptr=event)
# create extension model instance
event_ext.save_base(raw=True)
event_ext.refresh_from_db()
if (
event_ext.end is not None
and event_ext.end > event_ext.start
and event_ext.end - event_ext.start < MAX_MEETING_TIME
):
meeting_times_count = event_ext.meeting_times.count()
if meeting_times_count == 0:
event_ext.meeting_times.create(start=event_ext.start, end=event_ext.end)
# if there is exactly one meeting time, it should match the event start/end
elif meeting_times_count == 1:
event_ext.meeting_times.update(start=event_ext.start, end=event_ext.end)
# event has no details, or last retrieval was before the event happened
if event_ext.details is None or event_ext.details_timestamp < (
event_ext.end or event_ext.start
):
event_ext.details = membershipworks.get_event_by_eid(event.eid)
event_ext.registrations = membershipworks.get_event_registrations(event.eid)
event_ext.save()
# delete all events that did not occur in the event list
EventExt.objects.exclude(pk__in=events).delete()