-
Notifications
You must be signed in to change notification settings - Fork 122
Open
Description
您好,
很抱歉这个issue可能会打扰到项目组成员,但对于此项目上的复现我一直不得要点,得不到与文章相同的结果,还望前辈拨冗解惑。
对于贵组放出的 chatgpt-detector-roberta-chinese 模型的描述,此模型是由mix-filter训练得到的。
我采取的测试方式如下所示
最后对raw-full进行测试的结果:
2024-03-05 19:44:46,902 - testing - INFO - test_doc: {'f1': 0.9976726144297905}
与原论文的表中数据显著不同,所以我想请教一下,是我的测试方式有误吗,如果有误,正确的测试方式应该是什么?
最后,无论如何都感谢贵组的工作。
import argparse
import os
import numpy as np
import sys
import evaluate
import pandas as pd
import torch
import logging
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
from datasets import Dataset, concatenate_datasets
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from transformers import (
AutoModelForSequenceClassification,
AutoTokenizer,
AutoConfig,
BertForSequenceClassification
)
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('testing')
file_handler = logging.FileHandler('test.log')
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
sys.path.append('./')
_PARSER = argparse.ArgumentParser('ptm detector')
_PARSER.add_argument('--model_name', type=str, default='/data1/xxxxxx/DeepfakeText-chinese/model/chinese-roberta-wwm-ext', help='ptm model name')
_PARSER.add_argument('--roberta_model',type=str, default='/data1/xxxxxx/DeepfakeText-chinese/model/chatgpt-detector-roberta-chinese', help='roberta_model')
_PARSER.add_argument('--test_doc', type=str, default='../../data/zh_doc_test.csv', help='input doc test file path')
_PARSER.add_argument('--test_sent', type=str, default='../../data/shuffled_zh_sent_test.csv', help='input test sent file path')
_PARSER.add_argument('--batch_size', type=int, default=16, help='batch size')
_PARSER.add_argument('--epochs', type=int, default=2, help='epochs')
_PARSER.add_argument('--num_labels', type=int, default=2, help='num_labels')
_PARSER.add_argument('--cuda', type=str, default='0', help='gpu ids, like: 1,2,3')
_PARSER.add_argument('--seed', type=int, default=42, help='random seed.')
_PARSER.add_argument('--max_length', type=int, default=365, help='max_length')
_PARSER.add_argument('--stacking', type=bool, default=True, help='stacking')
_ARGS = _PARSER.parse_args()
if len(_ARGS.cuda) > 1:
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
os.environ['TORCH_DISTRIBUTED_DEBUG'] = 'DETAIL'
os.environ["OMP_NUM_THREADS"] = '8'
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # if cuda >= 10.2
os.environ['CUDA_VISIBLE_DEVICES'] = _ARGS.cuda
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def create_dataloader(args: argparse.Namespace):
"""
dataloaders分别是train_doc, test_doc, test_sent
"""
datasets = []
files = [args.test_doc, args.test_sent]
for file in files:
df = pd.read_csv(file)
dataset = Dataset.from_pandas(df)
datasets.append(dataset)
tokenizer = AutoTokenizer.from_pretrained(args.model_name, trust_remote_code=True)
def tokenize_fn(example):
return tokenizer(example['answer'], max_length=args.max_length, padding='max_length', truncation=True)
datasets = [datasets[0], datasets[1]]
names = ['id', 'question', 'answer', 'source']
tokenized_datasets = []
for dataset in datasets:
tokenized = dataset.map(
tokenize_fn,
batched=True,
remove_columns=names)
tokenized_datasets.append(tokenized)
def collate_fn(examples):
return tokenizer.pad(examples,return_tensors='pt')
dataloaders = []
for dataset in tokenized_datasets:
dataloader = DataLoader(dataset, shuffle=False, collate_fn=collate_fn, batch_size=args.batch_size)
dataloaders.append(dataloader)
return dataloaders
def eval(args, dataloaders):
if args.stacking:
# roberta_cnn_model = torch.load(args.roberta_cnn_model).to(device)
# roberta_cnn_model.eval()
# print("roberta_cnn_model loaded")
# roberta_model = torch.load(args.roberta_model).to(device)
# roberta_model.eval()
config = AutoConfig.from_pretrained(
args.roberta_model,
num_labels=2,
)
roberta_model = BertForSequenceClassification.from_pretrained(
args.roberta_model,
config=config,
).to(device)
# print(roberta_model.base_model)
# exit()
# for param in roberta_model.base_model.parameters():
# param.requires_grad = False
print("roberta_rnn_model loaded")
# roberta_rcnn_model = torch.load(args.roberta_rcnn_model).to(device)
# roberta_rcnn_model.eval()
# print("roberta_rcnn_model loaded")
# roberta_rcnn_model = torch.load(args.roberta_rcnn_model).to(device)
# roberta_rcnn_model.eval()
# print("roberta_rcnn_model loaded")
eval_name_list = ['test_doc', 'test_sent']
for item, eval_name in enumerate(eval_name_list, 0):
metric = evaluate.load("/data1/xxxxxx/DeepfakeText-chinese/dataset/metrics/f1")
for step, batch in enumerate(tqdm(dataloaders[item], desc='Evaling', colour="green")):
batch.to(device)
with torch.no_grad():
labels = batch.pop('label')
outputs = roberta_model(**batch)['logits']
predictions = outputs.argmax(dim=-1)
predictions, references = predictions, labels
metric.add_batch(
predictions=predictions,
references=references,
)
eval_metric = metric.compute()
logger.info(f"{eval_name}: {eval_metric}")
daataLoader = create_dataloader(_ARGS)
eval(_ARGS,daataLoader)
Metadata
Metadata
Assignees
Labels
No labels