Commit bd75b87d authored by nanahira's avatar nanahira

unfinished

parent 6c69e0b6
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
/build
/output
.git*
.dockerignore
Dockerfile
.gitlab-ci.yml
/config.yaml
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
/build
/output
/config.yaml
/ygopro-database
stages:
- build
- deploy
variables:
GIT_DEPTH: "1"
CONTAINER_TEST_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
CONTAINER_RELEASE_IMAGE: $CI_REGISTRY_IMAGE:latest
before_script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
build:
stage: build
tags:
- docker
script:
- docker build --pull -t $CONTAINER_TEST_IMAGE .
- docker push $CONTAINER_TEST_IMAGE
deploy_latest:
stage: deploy
tags:
- docker
script:
- docker pull $CONTAINER_TEST_IMAGE
- docker tag $CONTAINER_TEST_IMAGE $CONTAINER_RELEASE_IMAGE
- docker push $CONTAINER_RELEASE_IMAGE
only:
- master
deploy_tag:
stage: deploy
tags:
- docker
variables:
CONTAINER_TAG_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
script:
- docker pull $CONTAINER_TEST_IMAGE
- docker tag $CONTAINER_TEST_IMAGE $CONTAINER_TAG_IMAGE
- docker push $CONTAINER_TAG_IMAGE
only:
- tags
FROM node:buster-slim
RUN apt update && apt -y install python3 build-essential && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
WORKDIR /usr/src/app
COPY ./package*.json ./
RUN npm ci
COPY . ./
RUN npm run build
CMD ["npm", "run", "start"]
This diff is collapsed.
postDepth: 5
jpDatabasePath: ./ygopro-database/locales/ja-JP/cards.cdb
cnDatabasePath: ./ygopro-database/locales/zh-CN/cards.cdb
outputPath: ./output/cn.cdb
This diff is collapsed.
{
"name": "ygopro-cn-database-generator",
"version": "1.0.0",
"description": "Generates CN env database from NW posts.",
"main": "run.ts",
"scripts": {
"build": "tsc",
"start": "node build/run.js"
},
"repository": {
"type": "git",
"url": "git@git.mycard.moe:nanahira/ygopro-cn-database-generator.git"
},
"keywords": [
"YGOPro",
"NW"
],
"author": "Nanahira",
"license": "AGPL-3.0",
"dependencies": {
"@types/bunyan": "^1.8.6",
"@types/node": "^14.11.5",
"@types/sqlite3": "^3.1.6",
"@types/underscore": "^1.10.24",
"axios": "^0.20.0",
"bunyan": "^1.8.14",
"sqlite": "^4.0.15",
"sqlite3": "^5.0.0",
"typescript": "^4.0.3",
"underscore": "^1.11.0",
"yaml": "^1.10.0"
}
}
import fs from "fs";
import yaml from "yaml";
export interface Config {
postDepth: number;
jpDatabasePath: string;
cnDatabasePath: string;
outputPath: string;
}
export async function loadConfig(): Promise<Config> {
return yaml.parse(await fs.promises.readFile("./config.yaml", "utf-8"));
}
import axios from "axios";
import { Config, loadConfig } from "./config";
import bunyan from "bunyan";
import _ from "underscore";
export class CNFetcher {
config: Config;
log: bunyan;
constructor(loggerOptions: bunyan.LoggerOptions) {
this.log = bunyan.createLogger(loggerOptions);
}
async init() {
this.log.debug("Initializing...");
this.log.debug("Reading config...");
this.config = await loadConfig();
this.log.debug("Initialized.");
}
private async fetchPage(url: string): Promise<string> {
this.log.debug(`Downloading content from ${url} .`);
const { data } = await axios.get(url, {
responseType: "document"
});
this.log.debug(`Downloaded content from ${url} .`);
return data;
}
async getAllStrings(url: string): Promise<string[]> {
const data = await this.fetchPage(url);
const allStrings: string[] = data.match(/<font color="Silver">\u203b(.*)<\/font><br \/>/g);
if (!allStrings) {
return [];
}
const allCardNames = allStrings.map(m => m.match(/<font color="Silver">\u203b(.*)<\/font><br \/>/)[1]);
const uniueCardNames = _.uniq(allCardNames);
this.log.debug(`${uniueCardNames.length} cards found from ${url} .`);
return uniueCardNames;
}
async fetchPosts(): Promise<string[]> {
let posts: string[] = [];
for (let i = 1; i <= this.config.postDepth; ++i) {
const url = `https://bbs.newwise.com/forum-8-${i}.html`;
this.log.debug(`Fetching pages from ${url} .`);
const content = await this.fetchPage(url);
const contentMatches: string[] = content.match(/<a href="([^"]+)" [^>]*>【简体中文版】[^<]*<\/a>/g);
if (!contentMatches) {
this.log.debug(`No pages found from ${url} .`);
continue;
}
const postsFound = contentMatches.map(m => m.match(/<a href="([^"]+)" [^>]*>【简体中文版】[^<]*<\/a>/)[1]);
this.log.debug(`Got ${postsFound} pages from ${url} .`);
posts = posts.concat(postsFound);
}
return posts;
}
async fetch(): Promise<string[]> {
this.log.debug(`Started fetching...`);
const posts = await this.fetchPosts();
const strings = _.flatten(await Promise.all(posts.map(m => this.getAllStrings(m))), true);
this.log.debug(`Done.`);
return _.uniq(strings);
}
}
import { CNFetcher } from "../src/fetcher";
async function main() {
const fetcher = new CNFetcher({ name: "Test fetch", level: "debug" });
await fetcher.init();
const strings = await fetcher.fetch();
console.log(strings);
process.exit();
}
main();
{
"compilerOptions": {
"outDir": "build",
"module": "commonjs",
"target": "esnext",
"esModuleInterop": true,
"sourceMap": true
},
"compileOnSave": true,
"allowJs": true,
"include": [
"*.ts",
"src/*.ts",
"test/*.ts"
]
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment