Commit 2a4c8634 authored by nanahira's avatar nanahira

finish

parents
# compiled output
/dist
/node_modules
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# OS
.DS_Store
# Tests
/coverage
/.nyc_output
# IDEs and editors
/.idea
.project
.classpath
.c9/
*.launch
.settings/
*.sublime-workspace
# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
/data
/output
/config.yaml
.git*
Dockerfile
.dockerignore
/eng.traineddata
module.exports = {
parser: '@typescript-eslint/parser',
parserOptions: {
project: 'tsconfig.json',
tsconfigRootDir : __dirname,
sourceType: 'module',
},
plugins: ['@typescript-eslint/eslint-plugin'],
extends: [
'plugin:@typescript-eslint/recommended',
'plugin:prettier/recommended',
],
root: true,
env: {
node: true,
jest: true,
},
ignorePatterns: ['.eslintrc.js'],
rules: {
'@typescript-eslint/interface-name-prefix': 'off',
'@typescript-eslint/explicit-function-return-type': 'off',
'@typescript-eslint/explicit-module-boundary-types': 'off',
'@typescript-eslint/no-explicit-any': 'off',
},
};
# compiled output
/dist
/node_modules
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# OS
.DS_Store
# Tests
/coverage
/.nyc_output
# IDEs and editors
/.idea
.project
.classpath
.c9/
*.launch
.settings/
*.sublime-workspace
# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
/data
/output
/config.yaml
/eng.traineddata
stages:
- build
- combine
- deploy
variables:
GIT_DEPTH: "1"
CONTAINER_TEST_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
CONTAINER_TEST_ARM_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-arm
CONTAINER_TEST_X86_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-x86
CONTAINER_RELEASE_IMAGE: $CI_REGISTRY_IMAGE:latest
before_script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
build-x86:
stage: build
tags:
- docker
script:
- TARGET_IMAGE=$CONTAINER_TEST_X86_IMAGE
- docker build --pull -t $TARGET_IMAGE .
- docker push $TARGET_IMAGE
build-arm:
stage: build
tags:
- docker-arm
script:
- TARGET_IMAGE=$CONTAINER_TEST_ARM_IMAGE
- docker build --pull -t $TARGET_IMAGE .
- docker push $TARGET_IMAGE
combine:
stage: combine
tags:
- docker
script:
- TARGET_IMAGE=$CONTAINER_TEST_IMAGE
- SOURCE_IMAGE_2=$CONTAINER_TEST_ARM_IMAGE
- SOURCE_IMAGE_1=$CONTAINER_TEST_X86_IMAGE
- docker pull $SOURCE_IMAGE_1
- docker pull $SOURCE_IMAGE_2
- docker manifest create $TARGET_IMAGE --amend $SOURCE_IMAGE_1 --amend
$SOURCE_IMAGE_2
- docker manifest push $TARGET_IMAGE
deploy_latest:
stage: deploy
tags:
- docker
script:
- TARGET_IMAGE=$CONTAINER_RELEASE_IMAGE
- SOURCE_IMAGE=$CONTAINER_TEST_IMAGE
- docker pull $SOURCE_IMAGE
- docker tag $SOURCE_IMAGE $TARGET_IMAGE
- docker push $TARGET_IMAGE
only:
- master
deploy_tag:
stage: deploy
tags:
- docker
script:
- TARGET_IMAGE=$CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
- SOURCE_IMAGE=$CONTAINER_TEST_IMAGE
- docker pull $SOURCE_IMAGE
- docker tag $SOURCE_IMAGE $TARGET_IMAGE
- docker push $TARGET_IMAGE
only:
- tags
/install-npm.sh
.git*
/data
/output
/config.yaml
.idea
.dockerignore
Dockerfile
/src
/eng.traineddata
{
"singleQuote": true,
"trailingComma": "all"
}
\ No newline at end of file
FROM node:lts-bullseye-slim as base
LABEL Author="Nanahira <nanahira@momobako.com>"
RUN apt update && apt -y install python3 build-essential tesseract-ocr && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /var/log/*
WORKDIR /usr/src/app
COPY ./package*.json ./
FROM base as builder
RUN npm ci && npm cache clean --force
COPY . ./
RUN npm run build
FROM base
ENV NODE_ENV production
RUN npm ci && npm cache clean --force
COPY --from=builder /usr/src/app/dist ./dist
COPY ./config.example.yaml ./config.yaml
COPY ./lang-data ./
EXPOSE 3000
CMD [ "npm", "run", "start:prod" ]
This diff is collapsed.
# Wenyuanwall Fetch
Data fetcher for https://twitter.com/Hamimelon7/
host: '::'
port: 3000
hamiUsername: 'Hamimelon7'
hamiBlacklistSearch: '#蔷蔷挂人板'
twintUrl: 'http://twint'
twintToken: 'twintToken'
#!/bin/bash
npm install --save \
class-validator \
class-transformer \
@nestjs/swagger \
@nestjs/config \
yaml
npm install --save-dev \
@types/express
{
"collection": "@nestjs/schematics",
"sourceRoot": "src",
"compilerOptions": {
"plugins": ["@nestjs/swagger"]
}
}
This diff is collapsed.
{
"name": "wenyuanwall-fetch",
"version": "0.0.1",
"description": "",
"author": "",
"private": true,
"license": "UNLICENSED",
"scripts": {
"prebuild": "rimraf dist",
"build": "nest build",
"format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"",
"start": "nest start",
"start:dev": "nest start --watch",
"start:debug": "nest start --debug --watch",
"start:prod": "node dist/main",
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
"test": "jest",
"test:watch": "jest --watch",
"test:cov": "jest --coverage",
"test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand",
"test:e2e": "jest --config ./test/jest-e2e.json"
},
"dependencies": {
"@nestjs/axios": "^0.1.0",
"@nestjs/common": "^9.0.0",
"@nestjs/config": "^2.2.0",
"@nestjs/core": "^9.0.0",
"@nestjs/platform-express": "^9.0.0",
"@nestjs/swagger": "^6.0.3",
"class-transformer": "^0.5.1",
"class-validator": "^0.13.2",
"lodash": "^4.17.21",
"node-tesseract-ocr": "^2.2.1",
"p-queue": "6.6.2",
"reflect-metadata": "^0.1.13",
"rimraf": "^3.0.2",
"rxjs": "^7.2.0",
"yaml": "^2.1.1"
},
"devDependencies": {
"@nestjs/cli": "^9.0.0",
"@nestjs/schematics": "^9.0.0",
"@nestjs/testing": "^9.0.0",
"@types/express": "^4.17.13",
"@types/jest": "28.1.4",
"@types/lodash": "^4.14.182",
"@types/node": "^16.0.0",
"@types/supertest": "^2.0.11",
"@typescript-eslint/eslint-plugin": "^5.0.0",
"@typescript-eslint/parser": "^5.0.0",
"axios": "^0.27.2",
"eslint": "^8.0.1",
"eslint-config-prettier": "^8.3.0",
"eslint-plugin-prettier": "^4.0.0",
"jest": "28.1.2",
"prettier": "^2.3.2",
"source-map-support": "^0.5.20",
"supertest": "^6.1.3",
"ts-jest": "28.0.5",
"ts-loader": "^9.2.3",
"ts-node": "^10.0.0",
"tsconfig-paths": "4.0.0",
"typescript": "^4.3.5"
},
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "src",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node"
}
}
import { Module } from '@nestjs/common';
import { ConfigModule, ConfigService } from '@nestjs/config';
import { loadConfig } from './utility/config';
import { HttpModule } from '@nestjs/axios';
import { HamiFetcherService } from './hami-fetcher/hami-fetcher.service';
import { OcrService } from './ocr/ocr.service';
import { BlacklistService } from './blacklist/blacklist.service';
import { BlacklistController } from './blacklist/blacklist.controller';
@Module({
imports: [
ConfigModule.forRoot({
load: [loadConfig],
isGlobal: true,
}),
HttpModule.registerAsync({
imports: [ConfigModule],
inject: [ConfigService],
useFactory: async (configService: ConfigService) =>
configService.get('http'),
}),
],
providers: [HamiFetcherService, OcrService, BlacklistService],
controllers: [BlacklistController],
})
export class AppModule {}
import { Test, TestingModule } from '@nestjs/testing';
import { BlacklistController } from './blacklist.controller';
describe('BlacklistController', () => {
let controller: BlacklistController;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
controllers: [BlacklistController],
}).compile();
controller = module.get<BlacklistController>(BlacklistController);
});
it('should be defined', () => {
expect(controller).toBeDefined();
});
});
import { Controller, Get, Query } from '@nestjs/common';
import { BlacklistService } from './blacklist.service';
import { ApiOkResponse, ApiOperation, ApiQuery } from '@nestjs/swagger';
import { ReturnMessageDto } from '../dto/ReturnMessage.dto';
import { HamiResult } from '../dto/hami.dto';
const HamiResultDto = ReturnMessageDto([HamiResult]);
@Controller('blacklist')
export class BlacklistController {
constructor(private blacklist: BlacklistService) {}
@Get()
@ApiOperation({ summary: '获取黑名单' })
@ApiQuery({ name: 'since', description: '获取黑名单的起始时间' })
@ApiOkResponse({ type: HamiResultDto })
async getBlacklist(@Query('since') since: string) {
const data = await this.blacklist.fetchBlacklist(since);
return new HamiResultDto(200, 'success', data);
}
}
import { Test, TestingModule } from '@nestjs/testing';
import { BlacklistService } from './blacklist.service';
describe('BlacklistService', () => {
let service: BlacklistService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [BlacklistService],
}).compile();
service = module.get<BlacklistService>(BlacklistService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
});
import { ConsoleLogger, Injectable } from '@nestjs/common';
import { HamiFetcherService } from '../hami-fetcher/hami-fetcher.service';
import { OcrService } from '../ocr/ocr.service';
import { HamiData, HamiResult, TwintData } from '../dto/hami.dto';
import _ from 'lodash';
@Injectable()
export class BlacklistService extends ConsoleLogger {
constructor(private fetcher: HamiFetcherService, private ocr: OcrService) {
super('BlacklistService');
}
async fetchBlacklist(since: string) {
const twintData = await this.fetcher.fetchBlacklist(since);
this.log(`Fetched ${twintData.length} tweets of blacklist.`);
const checkedData = await Promise.all(
twintData.map((data) => this.checkTweet(data)),
);
const result = checkedData
.filter((data) => data.accounts?.length)
.map((data) => HamiResult.fromData(data));
this.log(`Found ${result.length} valid tweets.`);
return result;
}
async checkTweet(data: TwintData): Promise<HamiData> {
const urls = data.photos || [];
const accounts = (
await Promise.all(urls.map((url) => this.ocr.checkImage(url)))
).flat();
if (accounts.length) {
this.log(`Found account ${accounts.join(', ')} in tweet ${data.link}`);
}
return {
...data,
accounts: _.uniq(accounts),
};
}
}
import { ApiProperty } from '@nestjs/swagger';
import { HttpException } from '@nestjs/common';
export interface BlankReturnMessage {
statusCode: number;
message: string;
success: boolean;
}
export interface ReturnMessage<T> extends BlankReturnMessage {
data?: T;
}
export class BlankReturnMessageDto implements BlankReturnMessage {
@ApiProperty({ description: '返回状态' })
statusCode: number;
@ApiProperty({ description: '返回信息' })
message: string;
@ApiProperty({ description: '是否成功' })
success: boolean;
constructor(statusCode: number, message?: string) {
this.statusCode = statusCode;
this.message = message || 'success';
this.success = statusCode < 400;
}
toException() {
return new HttpException(this, this.statusCode);
}
}
type AnyClass = new (...args: any[]) => any;
type ClassOrArray = AnyClass | [AnyClass];
type TypeFromClass<T> = T extends new (...args: any[]) => infer U ? U : never;
export type ParseType<T extends ClassOrArray> = T extends [infer U]
? TypeFromClass<U>[]
: TypeFromClass<T>;
function getClass(o: ClassOrArray) {
return o instanceof Array ? o[0] : o;
}
export function ReturnMessageDto<T extends ClassOrArray>(type: T) {
const cl = class SpecificReturnMessage extends BlankReturnMessageDto {
data?: ParseType<T>;
constructor(statusCode: number, message?: string, data?: ParseType<T>) {
super(statusCode, message);
this.data = data;
}
};
ApiProperty({ description: '返回数据', type })(cl.prototype, 'data');
Object.defineProperty(cl, 'name', {
value: `${getClass(type).name}ReturnMessageDto`,
});
return cl;
}
export interface TwintReturn {
data: TwintData[];
message: string;
statusCode: number;
success: boolean;
}
export interface TwintData {
cashtags: string[];
conversation_id: string;
datestamp: string;
datetime: string;
geo: string;
hashtags: string[];
id: number;
id_str: string;
lang: string;
likes_count: number;
link: string;
mentions: string[];
name: string;
near: string;
photos: string[];
place: string;
quote_url: string;
replies_count: number;
reply_to: string[];
retweet: boolean;
retweet_date: string;
retweet_id: string;
retweets_count: number;
source: string;
thumbnail: string;
timestamp: string;
timezone: string;
trans_dest: string;
trans_src: string;
translate: string;
tweet: string;
urls: string[];
user_id: number;
user_id_str: string;
user_rt: string;
user_rt_id: string;
username: string;
video: number;
}
export interface HamiData extends TwintData {
accounts: string[];
}
export class HamiResult implements HamiData {
static fromData(data: HamiData): HamiResult {
const result = new HamiResult();
Object.assign(result, data);
return result;
}
cashtags: string[];
conversation_id: string;
datestamp: string;
datetime: string;
geo: string;
hashtags: string[];
id: number;
id_str: string;
lang: string;
likes_count: number;
link: string;
mentions: string[];
name: string;
near: string;
photos: string[];
place: string;
quote_url: string;
replies_count: number;
reply_to: string[];
retweet: boolean;
retweet_date: string;
retweet_id: string;
retweets_count: number;
source: string;
thumbnail: string;
timestamp: string;
timezone: string;
trans_dest: string;
trans_src: string;
translate: string;
tweet: string;
urls: string[];
user_id: number;
user_id_str: string;
user_rt: string;
user_rt_id: string;
username: string;
video: number;
accounts: string[];
}
import { Test, TestingModule } from '@nestjs/testing';
import { HamiFetcherService } from './hami-fetcher.service';
describe('HamiFetcherService', () => {
let service: HamiFetcherService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [HamiFetcherService],
}).compile();
service = module.get<HamiFetcherService>(HamiFetcherService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
});
import { ConsoleLogger, Injectable } from '@nestjs/common';
import { HttpService } from '@nestjs/axios';
import { TwintData, TwintReturn } from '../dto/hami.dto';
import { ConfigService } from '@nestjs/config';
import { lastValueFrom } from 'rxjs';
import { AxiosRequestConfig } from 'axios';
@Injectable()
export class HamiFetcherService extends ConsoleLogger {
private readonly twintUrl: string = this.config.get('twintUrl');
private readonly twintToken: string = this.config.get('twintToken');
private readonly hamiUsername: string = this.config.get('hamiUsername');
private readonly hamiBlacklistSearch: string = this.config.get(
'hamiBlacklistSearch',
);
constructor(private http: HttpService, private config: ConfigService) {
super('HamiFetcherService');
}
axiosConfig(): AxiosRequestConfig {
return {
headers: {
Authorization: `Bearer ${this.twintToken}`,
},
timeout: 600000,
responseType: 'json',
};
}
async fetchBlacklist(Since?: string): Promise<TwintData[]> {
try {
const { data } = await lastValueFrom(
this.http.post<TwintReturn>(
`${this.twintUrl}/Search`,
{
Username: 'Hamimelon7',
Search: '#蔷蔷挂人板',
Since,
},
this.axiosConfig(),
),
);
return data.data;
} catch (e) {
this.error(`Failed to fetch blacklist since ${Since}: ${e.message}`);
return [];
}
}
}
import { NestFactory } from '@nestjs/core';
import { SwaggerModule, DocumentBuilder } from '@nestjs/swagger';
import { NestExpressApplication } from '@nestjs/platform-express';
import { AppModule } from './app.module';
import { ConfigService } from '@nestjs/config';
async function bootstrap() {
const app = await NestFactory.create<NestExpressApplication>(AppModule);
app.enableCors();
app.set('trust proxy', ['172.16.0.0/12', 'loopback']);
app.setGlobalPrefix('api');
const documentConfig = new DocumentBuilder()
.setTitle('wenyuanwall-fetch')
.setDescription('文援墙挂人采集器。')
.setVersion('1.0')
.build();
const document = SwaggerModule.createDocument(app, documentConfig);
SwaggerModule.setup('docs', app, document);
const config = app.get(ConfigService);
await app.listen(
config.get<number>('port') || 3000,
config.get<string>('host') || '::',
);
}
bootstrap();
import { Test, TestingModule } from '@nestjs/testing';
import { OcrService } from './ocr.service';
describe('OcrService', () => {
let service: OcrService;
beforeEach(async () => {
const module: TestingModule = await Test.createTestingModule({
providers: [OcrService],
}).compile();
service = module.get<OcrService>(OcrService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
});
import { ConsoleLogger, Injectable } from '@nestjs/common';
import { HttpService } from '@nestjs/axios';
import { lastValueFrom } from 'rxjs';
import { recognize } from 'node-tesseract-ocr';
import PQueue from 'p-queue';
import * as os from 'os';
@Injectable()
export class OcrService extends ConsoleLogger {
constructor(private http: HttpService) {
super('OcrService');
}
private queue = new PQueue({ concurrency: os.cpus().length });
private async checkImageProcess(url: string): Promise<string[]> {
try {
const { data } = await lastValueFrom(
this.http.get<Buffer>(url, {
responseType: 'arraybuffer',
timeout: 30000,
}),
);
const text = await recognize(data);
if (!text) {
return [];
}
const exactMatches = text.match(/QQ: \d{5,10}/g);
if (exactMatches) {
return exactMatches.map((info) => info.slice(3));
}
return text.match(/\d{8,10}/g) || [];
} catch (e) {
this.error(`Failed to check url ${url}: ${e.message}`);
}
}
checkImage(url: string) {
return this.queue.add(() => this.checkImageProcess(url));
}
}
import yaml from 'yaml';
import * as fs from 'fs';
import { HttpModuleOptions } from '@nestjs/axios';
export interface Config {
host: string;
port: number;
http?: HttpModuleOptions;
hamiUsername: string;
hamiBlacklistSearch: string;
twintUrl: string;
twintToken: string;
}
const defaultConfig: Config = {
host: '::',
port: 3000,
http: {},
hamiUsername: 'Hamimelon7',
hamiBlacklistSearch: '#蔷蔷挂人板',
twintUrl: 'http://twint',
twintToken: 'twintToken',
};
export async function loadConfig(): Promise<Config> {
let readConfig: Partial<Config> = {};
try {
const configText = await fs.promises.readFile('./config.yaml', 'utf-8');
readConfig = yaml.parse(configText);
} catch (e) {
console.error(`Failed to read config: ${e.toString()}`);
}
return {
...defaultConfig,
...readConfig,
};
}
import { Test, TestingModule } from '@nestjs/testing';
import { INestApplication } from '@nestjs/common';
import * as request from 'supertest';
import { AppModule } from './../src/app.module';
describe('AppController (e2e)', () => {
let app: INestApplication;
beforeEach(async () => {
const moduleFixture: TestingModule = await Test.createTestingModule({
imports: [AppModule],
}).compile();
app = moduleFixture.createNestApplication();
await app.init();
});
/* it('/ (GET)', () => {
return request(app.getHttpServer())
.get('/')
.expect(200)
.expect('Hello World!');
}); */
});
{
"moduleFileExtensions": ["js", "json", "ts"],
"rootDir": ".",
"testEnvironment": "node",
"testRegex": ".e2e-spec.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
}
}
{
"extends": "./tsconfig.json",
"exclude": ["node_modules", "test", "dist", "**/*spec.ts"]
}
{
"compilerOptions": {
"module": "commonjs",
"declaration": true,
"removeComments": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"allowSyntheticDefaultImports": true,
"target": "es2021",
"sourceMap": true,
"outDir": "./dist",
"baseUrl": "./",
"incremental": true,
"esModuleInterop": true
},
"compileOnSave": true,
"allowJs": true
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment