Commit 6aff4237 authored by nanahira's avatar nanahira

check text in tweet content

parent 777a3302
Pipeline #14762 passed with stages
in 3 minutes and 36 seconds
......@@ -6,6 +6,7 @@ import { HamiFetcherService } from './hami-fetcher/hami-fetcher.service';
import { OcrService } from './ocr/ocr.service';
import { BlacklistService } from './blacklist/blacklist.service';
import { BlacklistController } from './blacklist/blacklist.controller';
import { MatchService } from './match/match.service';
@Module({
imports: [
......@@ -21,7 +22,7 @@ import { BlacklistController } from './blacklist/blacklist.controller';
configService.get('http'),
}),
],
providers: [HamiFetcherService, OcrService, BlacklistService],
providers: [HamiFetcherService, OcrService, BlacklistService, MatchService],
controllers: [BlacklistController],
})
export class AppModule {}
......@@ -3,10 +3,15 @@ import { HamiFetcherService } from '../hami-fetcher/hami-fetcher.service';
import { OcrService } from '../ocr/ocr.service';
import { HamiData, HamiResult, TwintData } from '../dto/hami.dto';
import _ from 'lodash';
import { MatchService } from 'src/match/match.service';
@Injectable()
export class BlacklistService extends ConsoleLogger {
constructor(private fetcher: HamiFetcherService, private ocr: OcrService) {
constructor(
private fetcher: HamiFetcherService,
private ocr: OcrService,
private match: MatchService,
) {
super('BlacklistService');
}
async fetchBlacklist(since: string) {
......@@ -24,9 +29,12 @@ export class BlacklistService extends ConsoleLogger {
async checkTweet(data: TwintData): Promise<HamiData> {
const urls = data.photos || [];
const accounts = (
await Promise.all(urls.map((url) => this.ocr.checkImage(url)))
).flat();
const accounts = [
...(
await Promise.all(urls.map((url) => this.ocr.checkImage(url)))
).flat(),
...this.match.matchText(data.tweet),
];
if (accounts.length) {
this.log(`Found account ${accounts.join(', ')} in tweet ${data.link}`);
}
......
import { Test, TestingModule } from '@nestjs/testing';
import { MatchService } from './match.service';
describe('MatchService', () => {
let service: MatchService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [MatchService],
}).compile();
service = module.get<MatchService>(MatchService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
});
import { Injectable } from '@nestjs/common';
@Injectable()
export class MatchService {
matchText(text: string) {
if (!text) {
return [];
}
const exactMatches = text.match(/QQ:\s*\d{8,10}/g);
if (exactMatches) {
return exactMatches.map((info) => info.replace(/^QQ:\s*/, ''));
}
return text.match(/\d{9,10}/g) || [];
}
}
......@@ -4,10 +4,15 @@ import { lastValueFrom } from 'rxjs';
import { recognize } from 'node-tesseract-ocr';
import PQueue from 'p-queue';
import { ConfigService } from '@nestjs/config';
import { MatchService } from 'src/match/match.service';
@Injectable()
export class OcrService extends ConsoleLogger {
constructor(private http: HttpService, private config: ConfigService) {
constructor(
private http: HttpService,
private config: ConfigService,
private match: MatchService,
) {
super('OcrService');
}
......@@ -24,14 +29,7 @@ export class OcrService extends ConsoleLogger {
}),
);
const text = await recognize(data);
if (!text) {
return [];
}
const exactMatches = text.match(/QQ:\s*\d{8,10}/g);
if (exactMatches) {
return exactMatches.map((info) => info.replace(/^QQ:\s*/, ''));
}
return text.match(/\d{9,10}/g) || [];
return this.match.matchText(text);
} catch (e) {
this.error(`Failed to check url ${url}: ${e.message}`);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment