Commit f9bb0a3a authored by nanahira's avatar nanahira

save

parent 5997d282
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
/build
/output
.git*
.dockerignore
Dockerfile
.gitlab-ci.yml
/config.yaml
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# TypeScript v1 declaration files
typings/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variables file
.env
.env.test
# parcel-bundler cache (https://parceljs.org/)
.cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and *not* Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
/build
/output
/config.yaml
stages:
- build
- deploy
variables:
GIT_DEPTH: "1"
CONTAINER_TEST_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
CONTAINER_RELEASE_IMAGE: $CI_REGISTRY_IMAGE:latest
before_script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
build:
stage: build
tags:
- docker
script:
- docker build --pull -t $CONTAINER_TEST_IMAGE .
- docker push $CONTAINER_TEST_IMAGE
deploy_latest:
stage: deploy
tags:
- docker
script:
- docker pull $CONTAINER_TEST_IMAGE
- docker tag $CONTAINER_TEST_IMAGE $CONTAINER_RELEASE_IMAGE
- docker push $CONTAINER_RELEASE_IMAGE
only:
- master
deploy_tag:
stage: deploy
tags:
- docker
variables:
CONTAINER_TAG_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_TAG
script:
- docker pull $CONTAINER_TEST_IMAGE
- docker tag $CONTAINER_TEST_IMAGE $CONTAINER_TAG_IMAGE
- docker push $CONTAINER_TAG_IMAGE
only:
- tags
FROM node:buster-slim
RUN apt update && apt -y install python3 && rm -rf /var/lib/apt/lists/*
WORKDIR /usr/src/app
COPY ./package*.json ./
RUN npm ci
COPY . ./
RUN npm run build
CMD ["npm", "run", "start"]
This diff is collapsed.
MySQLConfig:
host: mysql
user: tx3
database: tx3
password: my_password
smbConfig:
share: game
username: user
domain: WORKGROUP
password: my_password
pathPrefix: "."
fetchCron: "0 1 * * * *"
This diff is collapsed.
{
"name": "tx3-message-reader",
"version": "1.0.0",
"description": "Reads tx3 messages and analyze things.",
"main": "build/index.js",
"scripts": {
"build": "tsc",
"fetch": "node build/src/run.js",
"start": "node build/src/run.js"
},
"repository": {
"type": "git",
"url": "git@git.mycard.moe:nanahira/tx3-message-reader.git"
},
"author": "Nanahira",
"license": "AGPL-3.0",
"dependencies": {
"@marsaud/smb2": "^0.17.1",
"@types/node": "^14.11.2",
"@types/underscore": "^1.10.24",
"iconv-lite": "^0.6.2",
"moment": "^2.29.0",
"posthtml-parser": "^0.5.0",
"promise-mysql": "^4.1.3",
"typescript": "^4.0.3",
"underscore": "^1.11.0",
"yaml": "^1.10.0"
}
}
import fs from "fs";
import yaml from "yaml";
import { PoolConfig } from "promise-mysql";
export interface ISMB2Options {
share: string;
username: string;
domain: string;
password: string;
port?: number;
packetConcurrency?: number;
autoCloseTimeout?: number;
debug?: boolean;
}
export interface Config {
MySQLConfig: PoolConfig;
smbConfig: ISMB2Options;
pathPrefix: string;
fetchCron: string;
}
export async function loadConfig(): Promise<Config> {
return yaml.parse(await fs.promises.readFile("./config.yaml", "utf-8"));
}
import iconv from "iconv-lite";
import HTML from "posthtml-parser";
import _, { has } from "underscore";
export interface Message {
channel: string;
speaker: string;
content: string;
}
export function readChatBuffer(buffer: Buffer) {
const data = iconv.decode(buffer, "GB2312").replace(/#h.*#h/g, "");
const content = HTML(data);
const messages: Message[] = [];
const curMessage: Message = {
channel: null,
speaker: "none",
content: null
}
let hasError = false;
for (let meta of content) {
try {
if (typeof (meta) === "string") {
const messageMatch = meta.match(/^ (.*): (.*)\r\n$/);
if (!messageMatch) {
console.error(`Failed to parse message:`, meta);
hasError = true;
continue;
}
curMessage.speaker = messageMatch[1].length ? messageMatch[1] : "none";
curMessage.content = messageMatch[2];
} else if (meta.tag === "font") {
if (!meta.content) {
console.error(`Broken tag:`, meta);
hasError = true;
continue;
}
const messageMatch = (meta.content[0] as string).match(/^【(.{2,4})】:$/);
if (!messageMatch) {
console.error(`Failed to parse tag:`, meta);
hasError = true;
continue;
}
curMessage.channel = messageMatch[1];
} else if (meta.tag === "br") {
if (!hasError) {
messages.push(_.clone(curMessage));
} else {
console.error(`Skipped message ${JSON.stringify(curMessage)} becuase it contains errors.`);
}
hasError = false;
} else {
console.error(`Unknown tag:`, meta);
hasError = true;
}
} catch (e) {
console.error(`Errored parsing tag:`, meta, e.toString());
hasError = true;
}
}
return messages;
}
import { Config, loadConfig } from "./config";
import SMB from "@marsaud/smb2";
import mysql from "promise-mysql";
import moment from "moment";
import iconv from "iconv-lite";
import HTML from "posthtml-parser";
export class SMBReader {
logPrefix: string;
constructor(logPrefix: string) {
this.logPrefix = logPrefix;
}
config: Config;
smb: SMB;
db: mysql.Pool;
async init() {
console.error(`${this.logPrefix}Reading config...`);
this.config = await loadConfig();
this.smb = new SMB(this.config.smbConfig);
console.error(`${this.logPrefix}Connecting to database...`);
this.db = await mysql.createPool(this.config.MySQLConfig);
console.error(`${this.logPrefix}Creating table...`);
await this.db.query("CREATE TABLE IF NOT EXISTS `filesRead` (\n" +
" `fileid` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,\n" +
" `date` datetime UNIQUE NOT NULL,\n" +
" `filename` varchar(40) COLLATE utf8_unicode_ci NOT NULL,\n" +
" PRIMARY KEY (fileid),\n" +
" UNIQUE KEY (date),\n" +
" UNIQUE KEY (filename)\n" +
") ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci");
await this.db.query("CREATE TABLE IF NOT EXISTS `messages` (\n" +
" `id` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,\n" +
" `fileid` bigint(20) UNSIGNED NOT NULL,\n" +
" `channel` varchar(4) COLLATE utf8_unicode_ci NOT NULL,\n" +
" `speaker` varchar(16) COLLATE utf8_unicode_ci NOT NULL,\n" +
" `content` text COLLATE utf8_unicode_ci NOT NULL,\n" +
" PRIMARY KEY (id),\n" +
" FOREIGN KEY (fileid) REFERENCES filesRead(fileid),\n" +
" KEY (channel(4)),\n" +
" INDEX (speaker(16))\n" +
") ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci");
console.error(`${this.logPrefix}Initialization finished.`);
}
async run() {
console.error(`${this.logPrefix}Reading file list...`);
let filelist = (await this.smb.readdir(this.config.pathPrefix)).filter(m => m.match(/^chat_.*\.html$/));
const files = filelist.map(filename => {
const dateString = filename.match(/^chat_(.*)\.html$/)[1].replace(/-/g, ":");
const date = moment(dateString).utcOffset("+08:00", true)
return {
name: filename,
date
}
});
files.sort((f1, f2) => {
return f1.date.unix() - f2.date.unix();
});
for (let file of files) {
const testIndex: any[] = await this.db.query("select * from `filesRead` where filename = ?", [file.name]);
if (testIndex.length) {
console.error(`${this.logPrefix}File ${file.name} has already been read. Skipping.`);
continue;
}
console.error(`${this.logPrefix}Reading file ${file.name}...`);
const fileMetadata = await this.db.query("insert into `filesRead` set ?", {
filename: file.name,
date: file.date.format("YYYY-MM-DD HH:mm:ss")
});
const insertedFileID: number = fileMetadata.insertId;
}
}
}
import {readChatBuffer} from "../src/parse-buffer";
import fs from "fs";
async function main() {
const data = readChatBuffer(await fs.promises.readFile("test/data/chat_Fri Oct 02 00-03-56 2020.html"));
console.log(JSON.stringify(data, null, 2));
}
main();
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import { read } from "fs";
import { SMBReader } from "../src/smbreader";
async function main() {
const reader = new SMBReader("Test: ");
await reader.init();
await reader.run();
process.exit();
}
main();
{
"compilerOptions": {
"outDir": "build",
"module": "commonjs",
"target": "esnext",
"esModuleInterop": true,
"sourceMap": true
},
"compileOnSave": true,
"allowJs": true,
"include": [
"src/*.ts",
"test/*.ts",
"*.ts"
]
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment