Commit 6aff4237 authored by nanahira's avatar nanahira

check text in tweet content

parent 777a3302
Pipeline #14762 passed with stages
in 3 minutes and 36 seconds
...@@ -6,6 +6,7 @@ import { HamiFetcherService } from './hami-fetcher/hami-fetcher.service'; ...@@ -6,6 +6,7 @@ import { HamiFetcherService } from './hami-fetcher/hami-fetcher.service';
import { OcrService } from './ocr/ocr.service'; import { OcrService } from './ocr/ocr.service';
import { BlacklistService } from './blacklist/blacklist.service'; import { BlacklistService } from './blacklist/blacklist.service';
import { BlacklistController } from './blacklist/blacklist.controller'; import { BlacklistController } from './blacklist/blacklist.controller';
import { MatchService } from './match/match.service';
@Module({ @Module({
imports: [ imports: [
...@@ -21,7 +22,7 @@ import { BlacklistController } from './blacklist/blacklist.controller'; ...@@ -21,7 +22,7 @@ import { BlacklistController } from './blacklist/blacklist.controller';
configService.get('http'), configService.get('http'),
}), }),
], ],
providers: [HamiFetcherService, OcrService, BlacklistService], providers: [HamiFetcherService, OcrService, BlacklistService, MatchService],
controllers: [BlacklistController], controllers: [BlacklistController],
}) })
export class AppModule {} export class AppModule {}
...@@ -3,10 +3,15 @@ import { HamiFetcherService } from '../hami-fetcher/hami-fetcher.service'; ...@@ -3,10 +3,15 @@ import { HamiFetcherService } from '../hami-fetcher/hami-fetcher.service';
import { OcrService } from '../ocr/ocr.service'; import { OcrService } from '../ocr/ocr.service';
import { HamiData, HamiResult, TwintData } from '../dto/hami.dto'; import { HamiData, HamiResult, TwintData } from '../dto/hami.dto';
import _ from 'lodash'; import _ from 'lodash';
import { MatchService } from 'src/match/match.service';
@Injectable() @Injectable()
export class BlacklistService extends ConsoleLogger { export class BlacklistService extends ConsoleLogger {
constructor(private fetcher: HamiFetcherService, private ocr: OcrService) { constructor(
private fetcher: HamiFetcherService,
private ocr: OcrService,
private match: MatchService,
) {
super('BlacklistService'); super('BlacklistService');
} }
async fetchBlacklist(since: string) { async fetchBlacklist(since: string) {
...@@ -24,9 +29,12 @@ export class BlacklistService extends ConsoleLogger { ...@@ -24,9 +29,12 @@ export class BlacklistService extends ConsoleLogger {
async checkTweet(data: TwintData): Promise<HamiData> { async checkTweet(data: TwintData): Promise<HamiData> {
const urls = data.photos || []; const urls = data.photos || [];
const accounts = ( const accounts = [
await Promise.all(urls.map((url) => this.ocr.checkImage(url))) ...(
).flat(); await Promise.all(urls.map((url) => this.ocr.checkImage(url)))
).flat(),
...this.match.matchText(data.tweet),
];
if (accounts.length) { if (accounts.length) {
this.log(`Found account ${accounts.join(', ')} in tweet ${data.link}`); this.log(`Found account ${accounts.join(', ')} in tweet ${data.link}`);
} }
......
import { Test, TestingModule } from '@nestjs/testing';
import { MatchService } from './match.service';
describe('MatchService', () => {
let service: MatchService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [MatchService],
}).compile();
service = module.get<MatchService>(MatchService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
});
import { Injectable } from '@nestjs/common';
@Injectable()
export class MatchService {
matchText(text: string) {
if (!text) {
return [];
}
const exactMatches = text.match(/QQ:\s*\d{8,10}/g);
if (exactMatches) {
return exactMatches.map((info) => info.replace(/^QQ:\s*/, ''));
}
return text.match(/\d{9,10}/g) || [];
}
}
...@@ -4,10 +4,15 @@ import { lastValueFrom } from 'rxjs'; ...@@ -4,10 +4,15 @@ import { lastValueFrom } from 'rxjs';
import { recognize } from 'node-tesseract-ocr'; import { recognize } from 'node-tesseract-ocr';
import PQueue from 'p-queue'; import PQueue from 'p-queue';
import { ConfigService } from '@nestjs/config'; import { ConfigService } from '@nestjs/config';
import { MatchService } from 'src/match/match.service';
@Injectable() @Injectable()
export class OcrService extends ConsoleLogger { export class OcrService extends ConsoleLogger {
constructor(private http: HttpService, private config: ConfigService) { constructor(
private http: HttpService,
private config: ConfigService,
private match: MatchService,
) {
super('OcrService'); super('OcrService');
} }
...@@ -24,14 +29,7 @@ export class OcrService extends ConsoleLogger { ...@@ -24,14 +29,7 @@ export class OcrService extends ConsoleLogger {
}), }),
); );
const text = await recognize(data); const text = await recognize(data);
if (!text) { return this.match.matchText(text);
return [];
}
const exactMatches = text.match(/QQ:\s*\d{8,10}/g);
if (exactMatches) {
return exactMatches.map((info) => info.replace(/^QQ:\s*/, ''));
}
return text.match(/\d{9,10}/g) || [];
} catch (e) { } catch (e) {
this.error(`Failed to check url ${url}: ${e.message}`); this.error(`Failed to check url ${url}: ${e.message}`);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment