membershipworks: Use bulk upsert when scraping transactions
All checks were successful
Ruff / ruff (push) Successful in 29s
Test / test (push) Successful in 3m9s

Instead of trying to do fiddly things with relative times, which turns
out not to work very well. This will break if anyone changes the sid
or timestamp of a transaction though (because of course
MembershipWorks allows editing those).
This commit is contained in:
Adam Goldsmith 2024-09-02 12:47:26 -04:00
parent b056eb04ed
commit 62e48c6e6f
3 changed files with 52 additions and 11 deletions

View File

@ -0,0 +1,18 @@
# Generated by Django 5.1 on 2024-09-02 16:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("membershipworks", "0002_historical_member_and_flags"),
]
operations = [
migrations.AddConstraint(
model_name="transaction",
constraint=models.UniqueConstraint(
models.F("sid"), models.F("timestamp"), name="unique_sid_timestamp"
),
),
]

View File

@ -373,6 +373,9 @@ class Transaction(BaseModel):
class Meta: class Meta:
db_table = "transactions" db_table = "transactions"
constraints = [
models.UniqueConstraint("sid", "timestamp", name="unique_sid_timestamp")
]
def __str__(self): def __str__(self):
return f"{self.type} [{self.member if self.member else self.name}] {self.timestamp}" return f"{self.type} [{self.member if self.member else self.name}] {self.timestamp}"

View File

@ -70,27 +70,47 @@ def scrape_members(membershipworks: MembershipWorks):
def scrape_transactions(membershipworks: MembershipWorks): def scrape_transactions(membershipworks: MembershipWorks):
now = datetime.now() now = datetime.now()
start_date = datetime(2010, 1, 1) start_date = datetime(2010, 1, 1)
last_transaction = Transaction.objects.order_by("timestamp").last()
if last_transaction is not None:
# technically this has the potential to lose
# transactions, but it should be incredibly unlikely
start_date = last_transaction.timestamp + timedelta(seconds=1)
logger.info(f"Getting/Updating transactions since {start_date}...")
transactions_csv = membershipworks.get_transactions(start_date, now) transactions_csv = membershipworks.get_transactions(start_date, now)
transactions_json = membershipworks.get_transactions(start_date, now, json=True) transactions_json = membershipworks.get_transactions(start_date, now, json=True)
# this is terrible, but as long as the dates are the same, should be fiiiine # this is terrible, but as long as the dates are the same, should be fiiiine
transactions = [ transactions_data = [
{**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True) {**j, **v} for j, v in zip(transactions_csv, transactions_json, strict=True)
] ]
assert all( assert all(
t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "") t["Account ID"] == t.get("uid", "") and t["Payment ID"] == t.get("sid", "")
for t in transactions for t in transactions_data
) )
for csv_transaction in transactions: transactions = [
Transaction.from_api_dict(csv_transaction).save() Transaction.from_api_dict(transaction) for transaction in transactions_data
]
Transaction.objects.bulk_create(
transactions,
update_conflicts=True,
update_fields=(
"member",
"type",
"sum",
"fee",
"event_id",
"for_what",
"items",
"discount_code",
"note",
"name",
"contact_person",
"full_address",
"street",
"city",
"state_province",
"postal_code",
"country",
"phone",
"email",
),
unique_fields=("sid", "timestamp"),
)
@q_task_group("Scrape MembershipWorks Data") @q_task_group("Scrape MembershipWorks Data")