enhance(backend): improve hashtags/trend performance

This commit is contained in:
syuilo
2023-10-07 12:05:17 +09:00
parent 3a7558f36c
commit 71edc65d0d
4 changed files with 121 additions and 102 deletions

View File

@@ -3,29 +3,13 @@
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { Brackets } from 'typeorm';
import { Inject, Injectable } from '@nestjs/common';
import { Endpoint } from '@/server/api/endpoint-base.js';
import type { NotesRepository } from '@/models/_.js';
import type { MiNote } from '@/models/Note.js';
import { safeForSql } from '@/misc/safe-for-sql.js';
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
import { MetaService } from '@/core/MetaService.js';
import { DI } from '@/di-symbols.js';
/*
トレンドに載るためには「『直近a分間のユニーク投稿数が今からa分前今からb分前の間のユニーク投稿数のn倍以上』のハッシュタグの上位5位以内に入る」ことが必要
ユニーク投稿数とはそのハッシュタグと投稿ユーザーのペアのカウントで、例えば同じユーザーが複数回同じハッシュタグを投稿してもそのハッシュタグのユニーク投稿数は1とカウントされる
..が理想だけどPostgreSQLでどうするのか分からないので単に「直近Aの内に投稿されたユニーク投稿数が多いハッシュタグ」で妥協する
*/
const rangeA = 1000 * 60 * 60; // 60分
//const rangeB = 1000 * 60 * 120; // 2時間
//const coefficient = 1.25; // 「n倍」の部分
//const requiredUsers = 3; // 最低何人がそのタグを投稿している必要があるか
const max = 5;
import { FeaturedService } from '@/core/FeaturedService.js';
import { HashtagService } from '@/core/HashtagService.js';
export const meta = {
tags: ['hashtags'],
@@ -71,98 +55,22 @@ export const paramDef = {
@Injectable()
export default class extends Endpoint<typeof meta, typeof paramDef> { // eslint-disable-line import/no-default-export
constructor(
@Inject(DI.notesRepository)
private notesRepository: NotesRepository,
private metaService: MetaService,
private featuredService: FeaturedService,
private hashtagService: HashtagService,
) {
super(meta, paramDef, async () => {
const instance = await this.metaService.fetch(true);
const hiddenTags = instance.hiddenTags.map(t => normalizeForSearch(t));
const now = new Date(); // 5分単位で丸めた現在日時
now.setMinutes(Math.round(now.getMinutes() / 5) * 5, 0, 0);
const ranking = await this.featuredService.getHashtagsRanking(10);
const tagNotes = await this.notesRepository.createQueryBuilder('note')
.where('note.createdAt > :date', { date: new Date(now.getTime() - rangeA) })
.andWhere(new Brackets(qb => { qb
.where('note.visibility = \'public\'')
.orWhere('note.visibility = \'home\'');
}))
.andWhere('note.tags != \'{}\'')
.select(['note.tags', 'note.userId'])
.cache(60000) // 1 min
.getMany();
const charts = ranking.length === 0 ? {} : await this.hashtagService.getCharts(ranking, 20);
if (tagNotes.length === 0) {
return [];
}
const tags: {
name: string;
users: MiNote['userId'][];
}[] = [];
for (const note of tagNotes) {
for (const tag of note.tags) {
if (hiddenTags.includes(tag)) continue;
const x = tags.find(x => x.name === tag);
if (x) {
if (!x.users.includes(note.userId)) {
x.users.push(note.userId);
}
} else {
tags.push({
name: tag,
users: [note.userId],
});
}
}
}
// タグを人気順に並べ替え
const hots = tags
.sort((a, b) => b.users.length - a.users.length)
.map(tag => tag.name)
.slice(0, max);
//#region 2(または3)で話題と判定されたタグそれぞれについて過去の投稿数グラフを取得する
const countPromises: Promise<number[]>[] = [];
const range = 20;
// 10分
const interval = 1000 * 60 * 10;
for (let i = 0; i < range; i++) {
countPromises.push(Promise.all(hots.map(tag => this.notesRepository.createQueryBuilder('note')
.select('count(distinct note.userId)')
.where(`'{"${safeForSql(tag) ? tag : 'aichan_kawaii'}"}' <@ note.tags`)
.andWhere('note.createdAt < :lt', { lt: new Date(now.getTime() - (interval * i)) })
.andWhere('note.createdAt > :gt', { gt: new Date(now.getTime() - (interval * (i + 1))) })
.cache(60000) // 1 min
.getRawOne()
.then(x => parseInt(x.count, 10)),
)));
}
const countsLog = await Promise.all(countPromises);
//#endregion
const totalCounts = await Promise.all(hots.map(tag => this.notesRepository.createQueryBuilder('note')
.select('count(distinct note.userId)')
.where(`'{"${safeForSql(tag) ? tag : 'aichan_kawaii'}"}' <@ note.tags`)
.andWhere('note.createdAt > :gt', { gt: new Date(now.getTime() - rangeA) })
.cache(60000 * 60) // 60 min
.getRawOne()
.then(x => parseInt(x.count, 10)),
));
const stats = hots.map((tag, i) => ({
const stats = ranking.map((tag, i) => ({
tag,
chart: countsLog.map(counts => counts[i]),
usersCount: totalCounts[i],
chart: charts[tag],
usersCount: Math.max(...charts[tag]),
}));
return stats;